فهرست منبع

test: update test.pdf

Sidney233 2 ماه پیش
والد
کامیت
98a7d66d28
2فایلهای تغییر یافته به همراه17 افزوده شده و 17 حذف شده
  1. BIN
      tests/unittest/pdfs/test.pdf
  2. 17 17
      tests/unittest/test_e2e.py

BIN
tests/unittest/pdfs/test.pdf


+ 17 - 17
tests/unittest/test_e2e.py

@@ -72,7 +72,7 @@ def test_pipeline_with_two_config():
     res_json_path = (
         Path(__file__).parent / "output" / "test" / "txt" / "test_content_list.json"
     ).as_posix()
-    assert_content(res_json_path)
+    assert_content(res_json_path, parse_method="txt")
     infer_results, all_image_lists, all_pdf_docs, lang_list, ocr_enabled_list = (
         pipeline_doc_analyze(
             pdf_bytes_list,
@@ -93,7 +93,7 @@ def test_pipeline_with_two_config():
     res_json_path = (
         Path(__file__).parent / "output" / "test" / "ocr" / "test_content_list.json"
     ).as_posix()
-    assert_content(res_json_path)
+    assert_content(res_json_path, parse_method="ocr")
 
 
 def test_vlm_transformers_with_default_config():
@@ -164,7 +164,7 @@ def test_vlm_transformers_with_default_config():
         res_json_path = (
             Path(__file__).parent / "output" / "test" / "vlm" / "test_content_list.json"
         ).as_posix()
-        assert_content(res_json_path)
+        assert_content(res_json_path, parse_method="vlm")
 
 
 def write_infer_result(
@@ -238,7 +238,7 @@ def validate_html(html_content):
         return False
 
 
-def assert_content(content_path):
+def assert_content(content_path, parse_method="txt"):
     content_list = []
     with open(content_path, "r", encoding="utf-8") as file:
         content_list = json.load(file)
@@ -267,19 +267,15 @@ def assert_content(content_path):
                 )
                 assert validate_html(content_dict["table_body"])
                 target_str_list = [
-                    "Linear Regression",
+                    "Model",
+                    "Testing",
+                    "Error",
+                    "Linear",
+                    "Regression",
                     "0.98740",
                     "1321.2",
-                    "2-order Polynomial",
-                    "0.99906",
-                    "26.4",
-                    "3-order Polynomial",
-                    "0.99913",
-                    "101.2",
-                    "4-order Polynomial",
-                    "0.99914",
-                    "94.1",
-                    "Gray Prediction",
+                    "Gray",
+                    "Prediction",
                     "0.00617",
                     "687",
                 ]
@@ -287,8 +283,12 @@ def assert_content(content_path):
                 for target_str in target_str_list:
                     if target_str in content_dict["table_body"]:
                         correct_count += 1
-
-                assert correct_count > 0.9 * len(target_str_list)
+                if parse_method == "txt" or parse_method == "ocr":
+                    assert correct_count > 0.9 * len(target_str_list)
+                elif parse_method == "vlm":
+                    assert correct_count > 0.7 * len(target_str_list)
+                else:
+                    assert False
             # 公式校验,检测是否含有公式元素
             case "equation":
                 type_set.add("equation")