浏览代码

fix: update text formatting in table recovery logic for improved output consistency

myhloli 3 月之前
父节点
当前提交
1cd85ccfae

+ 1 - 1
mineru/model/table/rec/unet_table/main.py

@@ -94,7 +94,7 @@ class WiredTableRecognition:
             t_rec_ocr_list = self.transform_res(cell_box_det_map, polygons, logi_points)
             # 将每个单元格中的ocr识别结果排序和同行合并,输出的html能完整保留文字的换行格式
             t_rec_ocr_list = self.sort_and_gather_ocr_res(t_rec_ocr_list)
-            # cell_box_map =
+
             logi_points = [t_box_ocr["t_logic_box"] for t_box_ocr in t_rec_ocr_list]
             cell_box_det_map = {
                 i: [ocr_box_and_text[1] for ocr_box_and_text in t_box_ocr["t_ocr_res"]]

+ 2 - 1
mineru/model/table/rec/unet_table/utils_table_recover.py

@@ -292,7 +292,8 @@ def plot_html_table(
                     continue
                 if row == row_start and col == col_start:
                     ocr_rec_text = cell_box_map.get(i)
-                    text = "<br>".join(ocr_rec_text)
+                    # text = "<br>".join(ocr_rec_text)
+                    text = "".join(ocr_rec_text)
                     # 如果是起始单元格
                     row_span = row_end - row_start + 1
                     col_span = col_end - col_start + 1