Kaynağa Gözat

重构目录结构

赵小蒙 1 yıl önce
ebeveyn
işleme
7f0371da0e

+ 0 - 0
check_inline_formula.py → othoers/check_inline_formula.py


+ 2 - 2
pdf2json_infer.py → othoers/pdf2json_infer.py

@@ -24,10 +24,10 @@ from validation import cal_edit_distance, format_gt_bbox, label_match, detect_va
 
 from layout.bbox_sort import bbox_sort, CONTENT_IDX, CONTENT_TYPE_IDX
 from pre_proc.detect_images import parse_images          # 获取figures的bbox
-from pdf2text_recogTable import parse_tables           # 获取tables的bbox
+from pre_proc.detect_tables import parse_tables           # 获取tables的bbox
 from pre_proc.detect_equation import parse_equations     # 获取equations的bbox
 # from pdf2text_recogFootnote import parse_footnotes     # 获取footnotes的bbox
-from pdf2text_recogPara import process_blocks_per_page
+from post_proc.detect_para import process_blocks_per_page
 from libs.commons import parse_aws_param, parse_bucket_key, read_file, join_path
 
 

+ 0 - 0
pdf2text_evaluatePdfLayout.py → othoers/pdf2text_evaluatePdfLayout.py


+ 0 - 0
pdf2text_getNumberOfColumn.py → othoers/pdf2text_getNumberOfColumn.py


+ 0 - 0
pdf2text_recogFootnoteLine.py → othoers/pdf2text_recogFootnoteLine.py


+ 1 - 1
pdf2text_recogPara_v2.py → othoers/pdf2text_recogPara_v2.py

@@ -3469,7 +3469,7 @@ class ParaProcessPipeline:
 """
 Run this script to test the function with Command: 
 
-python pdf2text_recogPara.py [pdf_path] [output_pdf_path]
+python detect_para.py [pdf_path] [output_pdf_path]
 
 Params:
 - pdf_path: the path of the pdf file

+ 0 - 0
pdf2text_recogTitle.py → othoers/pdf2text_recogTitle.py


+ 0 - 0
vali_bbox_sort.py → othoers/vali_bbox_sort.py


+ 0 - 0
validation.py → othoers/validation.py


+ 4 - 4
pipeline/pdf_parse_by_model.py

@@ -16,14 +16,14 @@ from libs.markdown_utils import escape_special_markdown_char
 from libs.safe_filename import sanitize_filename
 from libs.vis_utils import draw_bbox_on_page, draw_layout_bbox_on_page
 from pre_proc.detect_images import parse_images
-from pdf2text_recogTable import parse_tables  # 获取tables的bbox
+from pre_proc.detect_tables import parse_tables  # 获取tables的bbox
 from pre_proc.detect_equation import parse_equations  # 获取equations的bbox
-from pdf2text_recogHeader import parse_headers  # 获取headers的bbox
-from pdf2text_recogPageNo import parse_pageNos  # 获取pageNos的bbox
+from pre_proc.detect_header import parse_headers  # 获取headers的bbox
+from pre_proc.detect_page_number import parse_pageNos  # 获取pageNos的bbox
 from pre_proc.detect_footnote import parse_footnotes_by_model, parse_footnotes_by_rule  # 获取footnotes的bbox
 from pre_proc.detect_footer_by_model import parse_footers  # 获取footers的bbox
 
-from pdf2text_recogPara import (
+from post_proc.detect_para import (
     ParaProcessPipeline,
     TitleDetectionException,
     TitleLevelException,

+ 1 - 1
pdf2text_recogPara.py → post_proc/detect_para.py

@@ -3395,7 +3395,7 @@ class ParaProcessPipeline:
 """
 Run this script to test the function with Command: 
 
-python pdf2text_recogPara.py [pdf_path] [output_pdf_path]
+python detect_para.py [pdf_path] [output_pdf_path]
 
 Params:
 - pdf_path: the path of the pdf file

+ 0 - 0
pdf2text_recogHeader.py → pre_proc/detect_header.py


+ 0 - 0
pdf2text_recogPageNo.py → pre_proc/detect_page_number.py


+ 0 - 0
pdf2text_recogTable.py → pre_proc/detect_tables.py


+ 1 - 1
test/test_para/test_pdf2text_recogPara_BlockContinuationProcessor.py

@@ -1,6 +1,6 @@
 import unittest
 
-from pdf2text_recogPara import BlockContinuationProcessor
+from post_proc.detect_para import BlockContinuationProcessor
 
 # from ... pdf2text_recogPara import BlockContinuationProcessor # another way to import
 

+ 1 - 1
test/test_para/test_pdf2text_recogPara_BlockInnerParasProcessor.py

@@ -1,6 +1,6 @@
 import unittest
 
-from pdf2text_recogPara import BlockTerminationProcessor
+from post_proc.detect_para import BlockTerminationProcessor
 
 # from ... pdf2text_recogPara import BlockInnerParasProcessor # another way to import
 

+ 1 - 1
test/test_para/test_pdf2text_recogPara_Common.py

@@ -1,6 +1,6 @@
 import unittest
 
-from pdf2text_recogPara import (
+from post_proc.detect_para import (
     is_bbox_overlap,
     is_in_bbox,
     is_line_right_aligned_from_neighbors,

+ 1 - 1
test/test_para/test_pdf2text_recogPara_TitleProcessor.py

@@ -2,7 +2,7 @@ import json
 import unittest
 
 from utils_for_test_para import UtilsForTestPara
-from pdf2text_recogPara import TitleProcessor
+from post_proc.detect_para import TitleProcessor
 
 # from ... pdf2text_recogPara import * # another way to import