Explorar el Código

[New method] table recognition pipeline v2 for paddlex 3.0 (#3978)

* table_pipe_v2 for pdx3.0

* Fix bug

* resolve conflicts

---------

Co-authored-by: Bobholamovic <bob1998425@hotmail.com>
Liu Jiaxuan hace 6 meses
padre
commit
44e4bf2e09

+ 12 - 12
docs/pipeline_usage/tutorials/ocr_pipelines/PP-StructureV3.en.md

@@ -1625,39 +1625,39 @@ To remove the page limit, please add the following configuration to the pipeline
 <td>No</td>
 </tr>
 <tr>
-<td><code>useOcrResultsWithTableCells</code></td>
+<td><code>useWiredTableCellsTransToHtml</code></td>
 <td><code>boolean</code></td>
-<td>Please refer to the description of the <code>use_ocr_results_with_table_cells</code> parameter of the pipeline object's <code>predict</code> method.</td>
+<td>Please refer to the description of the <code>use_wired_table_cells_trans_to_html</code> parameter of the pipeline object's <code>predict</code> method.</td>
 <td>No</td>
 </tr>
 <tr>
-<td><code>useE2eWiredTableRecModel</code></td>
+<td><code>useWirelessTableCellsTransToHtml</code></td>
 <td><code>boolean</code></td>
-<td>Please refer to the description of the <code>use_e2e_wired_table_rec_model</code> parameter of the pipeline object's <code>predict</code> method.</td>
+<td>Please refer to the description of the <code>use_wireless_table_cells_trans_to_html</code> parameter of the pipeline object's <code>predict</code> method.</td>
 <td>No</td>
 </tr>
 <tr>
-<td><code>useE2eWirelessTableRecModel</code></td>
+<td><code>useTableOrientationClassify</code></td>
 <td><code>boolean</code></td>
-<td>Please refer to the description of the <code>use_e2e_wireless_table_rec_model</code> parameter of the pipeline object's <code>predict</code> method.</td>
+<td>Please refer to the description of the <code>use_table_orientation_classify</code> parameter of the pipeline object's <code>predict</code> method.</td>
 <td>No</td>
 </tr>
 <tr>
-<td><code>useWiredTableCellsTransToHtml</code></td>
+<td><code>useOcrResultsWithTableCells</code></td>
 <td><code>boolean</code></td>
-<td>Please refer to the description of the <code>use_wired_table_cells_trans_to_html</code> parameter of the pipeline object's <code>predict</code> method.</td>
+<td>Please refer to the description of the <code>use_ocr_results_with_table_cells</code> parameter of the pipeline object's <code>predict</code> method.</td>
 <td>No</td>
 </tr>
 <tr>
-<td><code>useWirelessTableCellsTransToHtml</code></td>
+<td><code>useE2eWiredTableRecModel</code></td>
 <td><code>boolean</code></td>
-<td>Please refer to the description of the <code>use_wireless_table_cells_trans_to_html</code> parameter of the pipeline object's <code>predict</code> method.</td>
+<td>Please refer to the description of the <code>use_e2e_wired_table_rec_model</code> parameter of the pipeline object's <code>predict</code> method.</td>
 <td>No</td>
 </tr>
 <tr>
-<td><code>useTableOrientationClassify</code></td>
+<td><code>useE2eWirelessTableRecModel</code></td>
 <td><code>boolean</code></td>
-<td>Please refer to the description of the <code>use_table_orientation_classify</code> parameter of the pipeline object's <code>predict</code> method.</td>
+<td>Please refer to the description of the <code>use_e2e_wireless_table_rec_model</code> parameter of the pipeline object's <code>predict</code> method.</td>
 <td>No</td>
 </tr>
 </tbody>

+ 18 - 18
docs/pipeline_usage/tutorials/ocr_pipelines/PP-StructureV3.md

@@ -1571,24 +1571,6 @@ for res in output:
 <td>否</td>
 </tr>
 <tr>
-<td><code>useOcrResultsWithTableCells</code></td>
-<td><code>boolean</code></td>
-<td>请参阅产线对象中 <code>predict</code> 方法的 <code>use_ocr_results_with_table_cells</code> 参数相关说明。</td>
-<td>否</td>
-</tr>
-<tr>
-<td><code>useE2eWiredTableRecModel</code></td>
-<td><code>boolean</code></td>
-<td>请参阅产线对象中 <code>predict</code> 方法的 <code>use_e2e_wired_table_rec_model</code> 参数相关说明。</td>
-<td>否</td>
-</tr>
-<tr>
-<td><code>useE2eWirelessTableRecModel</code></td>
-<td><code>boolean</code></td>
-<td>请参阅产线对象中 <code>predict</code> 方法的 <code>use_e2e_wireless_table_rec_model</code> 参数相关说明。</td>
-<td>否</td>
-</tr>
-<tr>
 <td><code>useWiredTableCellsTransToHtml</code></td>
 <td><code>boolean</code></td>
 <td>请参阅产线对象中 <code>predict</code> 方法的 <code>use_wired_table_cells_trans_to_html</code> 参数相关说明。</td>
@@ -1606,6 +1588,24 @@ for res in output:
 <td>请参阅产线对象中 <code>predict</code> 方法的 <code>use_table_orientation_classify</code> 参数相关说明。</td>
 <td>No</td>
 </tr>
+<tr>
+<td><code>useOcrResultsWithTableCells</code></td>
+<td><code>boolean</code></td>
+<td>请参阅产线对象中 <code>predict</code> 方法的 <code>use_ocr_results_with_table_cells</code> 参数相关说明。</td>
+<td>否</td>
+</tr>
+<tr>
+<td><code>useE2eWiredTableRecModel</code></td>
+<td><code>boolean</code></td>
+<td>请参阅产线对象中 <code>predict</code> 方法的 <code>use_e2e_wired_table_rec_model</code> 参数相关说明。</td>
+<td>否</td>
+</tr>
+<tr>
+<td><code>useE2eWirelessTableRecModel</code></td>
+<td><code>boolean</code></td>
+<td>请参阅产线对象中 <code>predict</code> 方法的 <code>use_e2e_wireless_table_rec_model</code> 参数相关说明。</td>
+<td>否</td>
+</tr>
 </tbody>
 </table>
 <ul>

+ 6 - 6
docs/pipeline_usage/tutorials/ocr_pipelines/table_recognition_v2.en.md

@@ -1365,12 +1365,6 @@ To remove the page limit, please add the following configuration to the pipeline
 <td>No</td>
 </tr>
 <tr>
-<td><code>useOcrResultsWithTableCells</code></td>
-<td><code>boolean</code></td>
-<td>Please refer to the description of the <code>use_ocr_results_with_table_cells</code> parameter of the pipeline object's <code>predict</code> method.</td>
-<td>No</td>
-</tr>
-<tr>
 <td><code>useE2eWiredTableRecModel</code></td>
 <td><code>boolean</code></td>
 <td>Please refer to the description of the <code>use_e2e_wired_table_rec_model</code> parameter of the pipeline object's <code>predict</code> method.</td>
@@ -1400,6 +1394,12 @@ To remove the page limit, please add the following configuration to the pipeline
 <td>Please refer to the description of the <code>use_table_orientation_classify</code> parameter of the pipeline object's <code>predict</code> method.</td>
 <td>No</td>
 </tr>
+<tr>
+<td><code>useOcrResultsWithTableCells</code></td>
+<td><code>boolean</code></td>
+<td>Please refer to the description of the <code>use_ocr_results_with_table_cells</code> parameter of the pipeline object's <code>predict</code> method.</td>
+<td>No</td>
+</tr>
 </tbody>
 </table>
 <ul>

+ 6 - 6
docs/pipeline_usage/tutorials/ocr_pipelines/table_recognition_v2.md

@@ -1371,12 +1371,6 @@ for res in output:
 <td>否</td>
 </tr>
 <tr>
-<td><code>useOcrResultsWithTableCells</code></td>
-<td><code>boolean</code></td>
-<td>请参阅产线对象中 <code>predict</code> 方法的 <code>use_ocr_results_with_table_cells</code> 参数相关说明。</td>
-<td>否</td>
-</tr>
-<tr>
 <td><code>useE2eWiredTableRecModel</code></td>
 <td><code>boolean</code></td>
 <td>请参阅产线对象中 <code>predict</code> 方法的 <code>use_e2e_wired_table_rec_model</code> 参数相关说明。</td>
@@ -1406,6 +1400,12 @@ for res in output:
 <td>请参阅产线对象中 <code>predict</code> 方法的 <code>use_table_orientation_classify</code> 参数相关说明。</td>
 <td>No</td>
 </tr>
+<tr>
+<td><code>useOcrResultsWithTableCells</code></td>
+<td><code>boolean</code></td>
+<td>请参阅产线对象中 <code>predict</code> 方法的 <code>use_ocr_results_with_table_cells</code> 参数相关说明。</td>
+<td>否</td>
+</tr>
 </tbody>
 </table>
 <ul>

+ 0 - 2
paddlex/configs/pipelines/PP-StructureV3.yaml

@@ -143,7 +143,6 @@ SubPipelines:
 
   TableRecognition:
     pipeline_name: table_recognition_v2
-    batch_size: 8
     use_layout_detection: False
     use_doc_preprocessor: False
     use_ocr_model: False
@@ -175,7 +174,6 @@ SubPipelines:
     SubPipelines:
       GeneralOCR:
         pipeline_name: OCR
-        batch_size: 8
         text_type: general
         use_doc_preprocessor: False
         use_textline_orientation: True

+ 5 - 0
paddlex/configs/pipelines/table_recognition_v2.yaml

@@ -11,6 +11,11 @@ SubModules:
     model_name: PP-DocLayout-L
     model_dir: null
   
+  TableOrientationClassify:
+    module_name: doc_text_orientation
+    model_name: PP-LCNet_x1_0_doc_ori
+    model_dir: null
+  
   TableClassification:
     module_name: table_classification
     model_name: PP-LCNet_x1_0_table_cls

+ 10 - 12
paddlex/inference/pipelines/table_recognition/pipeline.py

@@ -288,7 +288,7 @@ class _TableRecognitionPipeline(BasePipeline):
         image_array: np.ndarray,
         overall_ocr_res: OCRResult,
         table_box: list,
-        use_table_cells_ocr_results: bool = False,
+        use_ocr_results_with_table_cells: bool = False,
         flag_find_nei_text: bool = True,
         cell_sort_by_y_projection: bool = False,
     ) -> SingleTableRecognitionResult:
@@ -300,17 +300,15 @@ class _TableRecognitionPipeline(BasePipeline):
             overall_ocr_res (OCRResult): Overall OCR result obtained after running the OCR pipeline.
                 The overall OCR results containing text recognition information.
             table_box (list): The table box coordinates.
-            use_table_cells_ocr_results (bool): whether to use OCR results with cells.
+            use_ocr_results_with_table_cells (bool): whether to use OCR results with cells.
             flag_find_nei_text (bool): Whether to find neighboring text.
             cell_sort_by_y_projection (bool): Whether to sort the matched OCR boxes by y-projection.
         Returns:
             SingleTableRecognitionResult: single table recognition result.
         """
         table_structure_pred = next(self.table_structure_model(image_array))
-        if use_table_cells_ocr_results == True:
-            table_cells_result = list(
-                map(lambda arr: arr.tolist(), table_structure_pred["bbox"])
-            )
+        if use_ocr_results_with_table_cells == True:
+            table_cells_result = table_structure_pred["bbox"]
             table_cells_result = [
                 [rect[0], rect[1], rect[4], rect[5]] for rect in table_cells_result
             ]
@@ -324,7 +322,7 @@ class _TableRecognitionPipeline(BasePipeline):
             table_structure_pred,
             overall_ocr_res,
             cells_texts_list,
-            use_table_cells_ocr_results,
+            use_ocr_results_with_table_cells,
             cell_sort_by_y_projection=cell_sort_by_y_projection,
         )
         neighbor_text = ""
@@ -353,7 +351,7 @@ class _TableRecognitionPipeline(BasePipeline):
         text_det_box_thresh: Optional[float] = None,
         text_det_unclip_ratio: Optional[float] = None,
         text_rec_score_thresh: Optional[float] = None,
-        use_table_cells_ocr_results: bool = False,
+        use_ocr_results_with_table_cells: bool = False,
         cell_sort_by_y_projection: Optional[bool] = None,
         **kwargs,
     ) -> TableRecognitionResult:
@@ -369,7 +367,7 @@ class _TableRecognitionPipeline(BasePipeline):
                 It will be used if it is not None and use_ocr_model is False.
             layout_det_res (DetResult): The layout detection result.
                 It will be used if it is not None and use_layout_detection is False.
-            use_table_cells_ocr_results (bool): whether to use OCR results with cells.
+            use_ocr_results_with_table_cells (bool): whether to use OCR results with cells.
             cell_sort_by_y_projection (bool): Whether to sort the matched OCR boxes by y-projection.
             **kwargs: Additional keyword arguments.
 
@@ -419,7 +417,7 @@ class _TableRecognitionPipeline(BasePipeline):
                         text_rec_score_thresh=text_rec_score_thresh,
                     )
                 )
-            elif use_table_cells_ocr_results == True:
+            elif use_ocr_results_with_table_cells == True:
                 assert self.general_ocr_config_bak != None
                 self.general_ocr_pipeline = self.create_pipeline(
                     self.general_ocr_config_bak
@@ -435,7 +433,7 @@ class _TableRecognitionPipeline(BasePipeline):
                     doc_preprocessor_image,
                     overall_ocr_res,
                     table_box,
-                    use_table_cells_ocr_results,
+                    use_ocr_results_with_table_cells,
                     flag_find_nei_text=False,
                     cell_sort_by_y_projection=cell_sort_by_y_projection,
                 )
@@ -456,7 +454,7 @@ class _TableRecognitionPipeline(BasePipeline):
                                 crop_img_info["img"],
                                 overall_ocr_res,
                                 table_box,
-                                use_table_cells_ocr_results,
+                                use_ocr_results_with_table_cells,
                                 cell_sort_by_y_projection=cell_sort_by_y_projection,
                             )
                         )

La diferencia del archivo ha sido suprimido porque es demasiado grande
+ 715 - 236
paddlex/inference/pipelines/table_recognition/pipeline_v2.py


+ 7 - 3
paddlex/inference/pipelines/table_recognition/table_recognition_post_processing_v2.py

@@ -413,8 +413,10 @@ def get_table_recognition_res(
     table_structure_result: list,
     table_cells_result: list,
     overall_ocr_res: OCRResult,
+    table_ocr_pred: dict,
     cells_texts_list: list,
     use_table_cells_ocr_results: bool,
+    use_table_cells_split_ocr: bool,
 ) -> SingleTableRecognitionResult:
     """
     Retrieve table recognition result from cropped image info, table structure prediction, and overall OCR result.
@@ -424,6 +426,7 @@ def get_table_recognition_res(
         table_structure_result (list): Predicted table structure.
         table_cells_result (list): Predicted table cells.
         overall_ocr_res (OCRResult): Overall OCR result from the input image.
+        table_ocr_pred (dict): Table OCR result from the input image.
         cells_texts_list (list): OCR results with cells.
         use_table_cells_ocr_results (bool): whether to use OCR results with cells.
 
@@ -432,9 +435,10 @@ def get_table_recognition_res(
     """
 
     table_cells_result = convert_to_four_point_coordinates(table_cells_result)
-
     table_box = np.array([table_box])
-    table_ocr_pred = get_sub_regions_ocr_res(overall_ocr_res, table_box)
+
+    if not (use_table_cells_ocr_results == True and use_table_cells_split_ocr == True):
+        table_ocr_pred = get_sub_regions_ocr_res(overall_ocr_res, table_box)
 
     crop_start_point = [table_box[0][0], table_box[0][1]]
     img_shape = overall_ocr_res["doc_preprocessor_res"]["output_img"].shape[0:2]
@@ -456,7 +460,7 @@ def get_table_recognition_res(
         table_cells_result, crop_start_point, img_shape
     )
 
-    if use_table_cells_ocr_results == True:
+    if use_table_cells_ocr_results == True and use_table_cells_split_ocr == False:
         ocr_dt_boxes = table_cells_result
         ocr_texts_res = cells_texts_list
     else:

+ 3 - 3
paddlex/inference/serving/basic_serving/_pipeline_apps/pp_structurev3.py

@@ -75,12 +75,12 @@ def create_pipeline_app(pipeline: Any, app_config: AppConfig) -> "FastAPI":
             seal_det_box_thresh=request.sealDetBoxThresh,
             seal_det_unclip_ratio=request.sealDetUnclipRatio,
             seal_rec_score_thresh=request.sealRecScoreThresh,
-            use_ocr_results_with_table_cells=request.useOcrResultsWithTableCells,
-            use_e2e_wired_table_rec_model=request.useE2eWiredTableRecModel,
-            use_e2e_wireless_table_rec_model=request.useE2eWirelessTableRecModel,
             use_wired_table_cells_trans_to_html=request.useWiredTableCellsTransToHtml,
             use_wireless_table_cells_trans_to_html=request.useWirelessTableCellsTransToHtml,
             use_table_orientation_classify=request.useTableOrientationClassify,
+            use_ocr_results_with_table_cells=request.useOcrResultsWithTableCells,
+            use_e2e_wired_table_rec_model=request.useE2eWiredTableRecModel,
+            use_e2e_wireless_table_rec_model=request.useE2eWirelessTableRecModel,
         )
 
         layout_parsing_results: List[Dict[str, Any]] = []

+ 1 - 1
paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition_v2.py

@@ -63,12 +63,12 @@ def create_pipeline_app(pipeline: Any, app_config: AppConfig) -> "FastAPI":
             text_det_box_thresh=request.textDetBoxThresh,
             text_det_unclip_ratio=request.textDetUnclipRatio,
             text_rec_score_thresh=request.textRecScoreThresh,
-            use_ocr_results_with_table_cells=request.useOcrResultsWithTableCells,
             use_e2e_wired_table_rec_model=request.useE2eWiredTableRecModel,
             use_e2e_wireless_table_rec_model=request.useE2eWirelessTableRecModel,
             use_wired_table_cells_trans_to_html=request.useWiredTableCellsTransToHtml,
             use_wireless_table_cells_trans_to_html=request.useWirelessTableCellsTransToHtml,
             use_table_orientation_classify=request.useTableOrientationClassify,
+            use_ocr_results_with_table_cells=request.useOcrResultsWithTableCells,
         )
 
         table_rec_results: List[Dict[str, Any]] = []

+ 3 - 3
paddlex/inference/serving/schemas/pp_structurev3.py

@@ -56,12 +56,12 @@ class InferRequest(ocr.BaseInferRequest):
     sealDetBoxThresh: Optional[float] = None
     sealDetUnclipRatio: Optional[float] = None
     sealRecScoreThresh: Optional[float] = None
-    useOcrResultsWithTableCells: bool = False
-    useE2eWiredTableRecModel: bool = False
-    useE2eWirelessTableRecModel: bool = True
     useWiredTableCellsTransToHtml: bool = False
     useWirelessTableCellsTransToHtml: bool = False
     useTableOrientationClassify: bool = True
+    useOcrResultsWithTableCells: bool = True
+    useE2eWiredTableRecModel: bool = False
+    useE2eWirelessTableRecModel: bool = True
 
 
 class MarkdownData(BaseModel):

+ 1 - 1
paddlex/inference/serving/schemas/table_recognition_v2.py

@@ -45,12 +45,12 @@ class InferRequest(ocr.BaseInferRequest):
     textDetBoxThresh: Optional[float] = None
     textDetUnclipRatio: Optional[float] = None
     textRecScoreThresh: Optional[float] = None
-    useOcrResultsWithTableCells: bool = False
     useE2eWiredTableRecModel: bool = False
     useE2eWirelessTableRecModel: bool = False
     useWiredTableCellsTransToHtml: bool = False
     useWirelessTableCellsTransToHtml: bool = False
     useTableOrientationClassify: bool = True
+    useOcrResultsWithTableCells: bool = True
 
 
 class TableRecResult(BaseModel):

Algunos archivos no se mostraron porque demasiados archivos cambiaron en este cambio