|
|
@@ -336,6 +336,24 @@ class LayoutParsingPipelineV2(BasePipeline):
|
|
|
self.layout_det_model(doc_preprocessor_image),
|
|
|
)
|
|
|
|
|
|
+ if model_settings["use_formula_recognition"]:
|
|
|
+ formula_res_all = next(
|
|
|
+ self.formula_recognition_pipeline(
|
|
|
+ doc_preprocessor_image,
|
|
|
+ use_layout_detection=False,
|
|
|
+ use_doc_orientation_classify=False,
|
|
|
+ use_doc_unwarping=False,
|
|
|
+ layout_det_res=layout_det_res,
|
|
|
+ ),
|
|
|
+ )
|
|
|
+ formula_res_list = formula_res_all["formula_res_list"]
|
|
|
+ else:
|
|
|
+ formula_res_list = []
|
|
|
+
|
|
|
+ for formula_res in formula_res_list:
|
|
|
+ x_min, y_min, x_max, y_max = list(map(int, formula_res["dt_polys"]))
|
|
|
+ doc_preprocessor_image[y_min:y_max, x_min:x_max, :] = 255.0
|
|
|
+
|
|
|
if (
|
|
|
model_settings["use_general_ocr"]
|
|
|
or model_settings["use_table_recognition"]
|
|
|
@@ -351,6 +369,24 @@ class LayoutParsingPipelineV2(BasePipeline):
|
|
|
text_rec_score_thresh=text_rec_score_thresh,
|
|
|
),
|
|
|
)
|
|
|
+
|
|
|
+ for formula_res in formula_res_list:
|
|
|
+ x_min, y_min, x_max, y_max = list(map(int, formula_res["dt_polys"]))
|
|
|
+ poly_points = [
|
|
|
+ (x_min, y_min),
|
|
|
+ (x_max, y_min),
|
|
|
+ (x_max, y_max),
|
|
|
+ (x_min, y_max),
|
|
|
+ ]
|
|
|
+ overall_ocr_res["dt_polys"].append(poly_points)
|
|
|
+ overall_ocr_res["rec_texts"].append(
|
|
|
+ f"${formula_res['rec_formula']}$"
|
|
|
+ )
|
|
|
+ overall_ocr_res["rec_boxes"] = np.vstack(
|
|
|
+ (overall_ocr_res["rec_boxes"], [formula_res["dt_polys"]])
|
|
|
+ )
|
|
|
+ overall_ocr_res["rec_polys"].append(poly_points)
|
|
|
+ overall_ocr_res["rec_scores"].append(1)
|
|
|
else:
|
|
|
overall_ocr_res = {}
|
|
|
|
|
|
@@ -398,22 +434,11 @@ class LayoutParsingPipelineV2(BasePipeline):
|
|
|
else:
|
|
|
seal_res_list = []
|
|
|
|
|
|
- if model_settings["use_formula_recognition"]:
|
|
|
- formula_res_all = next(
|
|
|
- self.formula_recognition_pipeline(
|
|
|
- doc_preprocessor_image,
|
|
|
- use_layout_detection=False,
|
|
|
- use_doc_orientation_classify=False,
|
|
|
- use_doc_unwarping=False,
|
|
|
- layout_det_res=layout_det_res,
|
|
|
- ),
|
|
|
- )
|
|
|
- formula_res_list = formula_res_all["formula_res_list"]
|
|
|
- else:
|
|
|
- formula_res_list = []
|
|
|
-
|
|
|
- for table_res in table_res_list:
|
|
|
- table_res["layout_bbox"] = table_res["cell_box_list"][0]
|
|
|
+ for formula_res in formula_res_list:
|
|
|
+ x_min, y_min, x_max, y_max = list(map(int, formula_res["dt_polys"]))
|
|
|
+ doc_preprocessor_image[y_min:y_max, x_min:x_max, :] = formula_res[
|
|
|
+ "input_img"
|
|
|
+ ]
|
|
|
|
|
|
structure_res = get_structure_res(
|
|
|
overall_ocr_res,
|