| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657 |
- import json
- import os
- import shutil
- import tempfile
- from magic_pdf.integrations.rag.type import CategoryType
- from magic_pdf.integrations.rag.utils import (
- convert_middle_json_to_layout_elements, inference)
- def test_convert_middle_json_to_layout_elements():
- # setup
- unitest_dir = '/tmp/magic_pdf/unittest/integrations/rag'
- os.makedirs(unitest_dir, exist_ok=True)
- temp_output_dir = tempfile.mkdtemp(dir=unitest_dir)
- os.makedirs(temp_output_dir, exist_ok=True)
- # test
- with open('tests/test_integrations/test_rag/assets/middle.json') as f:
- json_data = json.load(f)
- res = convert_middle_json_to_layout_elements(json_data, temp_output_dir)
- assert len(res) == 1
- assert len(res[0].layout_dets) == 10
- assert res[0].layout_dets[0].anno_id == 0
- assert res[0].layout_dets[0].category_type == CategoryType.text
- assert len(res[0].extra.element_relation) == 3
- # teardown
- shutil.rmtree(temp_output_dir)
- def test_inference():
- asset_dir = 'tests/test_integrations/test_rag/assets'
- # setup
- unitest_dir = '/tmp/magic_pdf/unittest/integrations/rag'
- os.makedirs(unitest_dir, exist_ok=True)
- temp_output_dir = tempfile.mkdtemp(dir=unitest_dir)
- os.makedirs(temp_output_dir, exist_ok=True)
- # test
- res = inference(
- asset_dir + '/one_page_with_table_image.pdf',
- temp_output_dir,
- 'ocr',
- )
- assert res is not None
- assert len(res) == 1
- assert len(res[0].layout_dets) == 10
- assert res[0].layout_dets[0].anno_id == 0
- assert res[0].layout_dets[0].category_type == CategoryType.text
- assert len(res[0].extra.element_relation) == 3
- # teardown
- shutil.rmtree(temp_output_dir)
|