2 months ago · 98a7d66d28
--- a/tests/unittest/pdfs/test.pdf
+++ b/tests/unittest/pdfs/test.pdf
--- a/tests/unittest/test_e2e.py
+++ b/tests/unittest/test_e2e.py
@@ -72,7 +72,7 @@ def test_pipeline_with_two_config():
 
															     res_json_path = (
														
 
															         Path(__file__).parent / "output" / "test" / "txt" / "test_content_list.json"
														
 
															     ).as_posix()
														
 
															-    assert_content(res_json_path)
														
 
															+    assert_content(res_json_path, parse_method="txt")
														
 
															     infer_results, all_image_lists, all_pdf_docs, lang_list, ocr_enabled_list = (
														
 
															         pipeline_doc_analyze(
														
 
															             pdf_bytes_list,
														
@@ -93,7 +93,7 @@ def test_pipeline_with_two_config():
 
															     res_json_path = (
														
 
															         Path(__file__).parent / "output" / "test" / "ocr" / "test_content_list.json"
														
 
															     ).as_posix()
														
 
															-    assert_content(res_json_path)
														
 
															+    assert_content(res_json_path, parse_method="ocr")
														
 
															 def test_vlm_transformers_with_default_config():
														
@@ -164,7 +164,7 @@ def test_vlm_transformers_with_default_config():
 
															         res_json_path = (
														
 
															             Path(__file__).parent / "output" / "test" / "vlm" / "test_content_list.json"
														
 
															         ).as_posix()
														
 
															-        assert_content(res_json_path)
														
 
															+        assert_content(res_json_path, parse_method="vlm")
														
 
															 def write_infer_result(
														
@@ -238,7 +238,7 @@ def validate_html(html_content):
 
															         return False
														
 
															-def assert_content(content_path):
														
 
															+def assert_content(content_path, parse_method="txt"):
														
 
															     content_list = []
														
 
															     with open(content_path, "r", encoding="utf-8") as file:
														
 
															         content_list = json.load(file)
														
@@ -267,19 +267,15 @@ def assert_content(content_path):
 
															                 )
														
 
															                 assert validate_html(content_dict["table_body"])
														
 
															                 target_str_list = [
														
 
															-                    "Linear Regression",
														
 
															+                    "Model",
														
 
															+                    "Testing",
														
 
															+                    "Error",
														
 
															+                    "Linear",
														
 
															+                    "Regression",
														
 
															                     "0.98740",
														
 
															                     "1321.2",
														
 
															-                    "2-order Polynomial",
														
 
															-                    "0.99906",
														
 
															-                    "26.4",
														
 
															-                    "3-order Polynomial",
														
 
															-                    "0.99913",
														
 
															-                    "101.2",
														
 
															-                    "4-order Polynomial",
														
 
															-                    "0.99914",
														
 
															-                    "94.1",
														
 
															-                    "Gray Prediction",
														
 
															+                    "Gray",
														
 
															+                    "Prediction",
														
 
															                     "0.00617",
														
 
															                     "687",
														
 
															                 ]
														
@@ -287,8 +283,12 @@ def assert_content(content_path):
 
															                 for target_str in target_str_list:
														
 
															                     if target_str in content_dict["table_body"]:
														
 
															                         correct_count += 1
														
 
															-
														
 
															-                assert correct_count > 0.9 * len(target_str_list)
														
 
															+                if parse_method == "txt" or parse_method == "ocr":
														
 
															+                    assert correct_count > 0.9 * len(target_str_list)
														
 
															+                elif parse_method == "vlm":
														
 
															+                    assert correct_count > 0.7 * len(target_str_list)
														
 
															+                else:
														
 
															+                    assert False
														
 
															             # 公式校验，检测是否含有公式元素
														
 
															             case "equation":
														
 
															                 type_set.add("equation")