Browse Source

Feat/support rag (#510)

* Create requirements-docker.txt

* feat: update deps to support rag

* feat: add support to rag, add rag_data_reader api for rag integration

* feat: let user retrieve the filename of the processed file

* feat: add projects demo for rag integrations

---------

Co-authored-by: Xiaomeng Zhao <moe@myhloli.com>
Co-authored-by: icecraft <xurui1@pjlab.org.cn>
icecraft 1 year ago
parent
commit
b72d4ebd94

+ 0 - 0
magic_pdf/integrations/__init__.py


+ 0 - 0
magic_pdf/integrations/rag/__init__.py


+ 82 - 0
magic_pdf/integrations/rag/api.py

@@ -0,0 +1,82 @@
+import os
+from pathlib import Path
+
+from loguru import logger
+
+from magic_pdf.integrations.rag.type import (ElementRelation, LayoutElements,
+                                             Node)
+from magic_pdf.integrations.rag.utils import inference
+
+
+class RagPageReader:
+
+    def __init__(self, pagedata: LayoutElements):
+        self.o = [
+            Node(
+                category_type=v.category_type,
+                text=v.text,
+                image_path=v.image_path,
+                anno_id=v.anno_id,
+                latex=v.latex,
+                html=v.html,
+            ) for v in pagedata.layout_dets
+        ]
+
+        self.pagedata = pagedata
+
+    def __iter__(self):
+        return iter(self.o)
+
+    def get_rel_map(self) -> list[ElementRelation]:
+        return self.pagedata.extra.element_relation
+
+
+class RagDocumentReader:
+
+    def __init__(self, ragdata: list[LayoutElements]):
+        self.o = [RagPageReader(v) for v in ragdata]
+
+    def __iter__(self):
+        return iter(self.o)
+
+
+class DataReader:
+
+    def __init__(self, path_or_directory: str, method: str, output_dir: str):
+        self.path_or_directory = path_or_directory
+        self.method = method
+        self.output_dir = output_dir
+        self.pdfs = []
+        if os.path.isdir(path_or_directory):
+            for doc_path in Path(path_or_directory).glob('*.pdf'):
+                self.pdfs.append(doc_path)
+        else:
+            assert path_or_directory.endswith('.pdf')
+            self.pdfs.append(Path(path_or_directory))
+
+    def get_documents_count(self) -> int:
+        """Returns the number of documents in the directory."""
+        return len(self.pdfs)
+
+    def get_document_result(self, idx: int) -> RagDocumentReader | None:
+        """
+        Args:
+            idx (int): the index of documents under the
+                directory path_or_directory
+
+        Returns:
+            RagDocumentReader | None: RagDocumentReader is an iterable object,
+            more details @RagDocumentReader
+        """
+        if idx >= self.get_documents_count() or idx < 0:
+            logger.error(f'invalid idx: {idx}')
+            return None
+        res = inference(str(self.pdfs[idx]), self.output_dir, self.method)
+        if res is None:
+            logger.warning(f'failed to inference pdf {self.pdfs[idx]}')
+            return None
+        return RagDocumentReader(res)
+
+    def get_document_filename(self, idx: int) -> Path:
+        """get the filename of the document."""
+        return self.pdfs[idx]

+ 82 - 0
magic_pdf/integrations/rag/type.py

@@ -0,0 +1,82 @@
+from enum import Enum
+
+from pydantic import BaseModel, Field
+
+
+# rag
+class CategoryType(Enum):  # py310 not support StrEnum
+    text = 'text'
+    title = 'title'
+    interline_equation = 'interline_equation'
+    image = 'image'
+    image_body = 'image_body'
+    image_caption = 'image_caption'
+    table = 'table'
+    table_body = 'table_body'
+    table_caption = 'table_caption'
+    table_footnote = 'table_footnote'
+
+
+class ElementRelType(Enum):
+    sibling = 'sibling'
+
+
+class PageInfo(BaseModel):
+    page_no: int = Field(description='the index of page, start from zero',
+                         ge=0)
+    height: int = Field(description='the height of page', gt=0)
+    width: int = Field(description='the width of page', ge=0)
+    image_path: str | None = Field(description='the image of this page',
+                                   default=None)
+
+
+class ContentObject(BaseModel):
+    category_type: CategoryType = Field(description='类别')
+    poly: list[float] = Field(
+        description=('Coordinates, need to convert back to PDF coordinates,'
+                     ' order is top-left, top-right, bottom-right, bottom-left'
+                     ' x,y coordinates'))
+    ignore: bool = Field(description='whether ignore this object',
+                         default=False)
+    text: str | None = Field(description='text content of the object',
+                             default=None)
+    image_path: str | None = Field(description='path of embedded image',
+                                   default=None)
+    order: int = Field(description='the order of this object within a page',
+                       default=-1)
+    anno_id: int = Field(description='unique id', default=-1)
+    latex: str | None = Field(description='latex result', default=None)
+    html: str | None = Field(description='html result', default=None)
+
+
+class ElementRelation(BaseModel):
+    source_anno_id: int = Field(description='unique id of the source object',
+                                default=-1)
+    target_anno_id: int = Field(description='unique id of the target object',
+                                default=-1)
+    relation: ElementRelType = Field(
+        description='the relation between source and target element')
+
+
+class LayoutElementsExtra(BaseModel):
+    element_relation: list[ElementRelation] = Field(
+        description='the relation between source and target element')
+
+
+class LayoutElements(BaseModel):
+    layout_dets: list[ContentObject] = Field(
+        description='layout element details')
+    page_info: PageInfo = Field(description='page info')
+    extra: LayoutElementsExtra = Field(description='extra information')
+
+
+# iter data format
+class Node(BaseModel):
+    category_type: CategoryType = Field(description='类别')
+    text: str | None = Field(description='text content of the object',
+                             default=None)
+    image_path: str | None = Field(description='path of embedded image',
+                                   default=None)
+    anno_id: int = Field(description='unique id', default=-1)
+    latex: str | None = Field(description='latex result', default=None)
+    html: str | None = Field(description='html result', default=None)

+ 285 - 0
magic_pdf/integrations/rag/utils.py

@@ -0,0 +1,285 @@
+import json
+import os
+from pathlib import Path
+
+from loguru import logger
+
+import magic_pdf.model as model_config
+from magic_pdf.dict2md.ocr_mkcontent import merge_para_with_text
+from magic_pdf.integrations.rag.type import (CategoryType, ContentObject,
+                                             ElementRelation, ElementRelType,
+                                             LayoutElements,
+                                             LayoutElementsExtra, PageInfo)
+from magic_pdf.libs.ocr_content_type import BlockType, ContentType
+from magic_pdf.rw.AbsReaderWriter import AbsReaderWriter
+from magic_pdf.rw.DiskReaderWriter import DiskReaderWriter
+from magic_pdf.tools.common import do_parse, prepare_env
+
+
+def convert_middle_json_to_layout_elements(
+    json_data: dict,
+    output_dir: str,
+) -> list[LayoutElements]:
+    uniq_anno_id = 0
+
+    res: list[LayoutElements] = []
+    for page_no, page_data in enumerate(json_data['pdf_info']):
+        order_id = 0
+        page_info = PageInfo(
+            height=int(page_data['page_size'][1]),
+            width=int(page_data['page_size'][0]),
+            page_no=page_no,
+        )
+        layout_dets: list[ContentObject] = []
+        extra_element_relation: list[ElementRelation] = []
+
+        for para_block in page_data['para_blocks']:
+            para_text = ''
+            para_type = para_block['type']
+
+            if para_type == BlockType.Text:
+                para_text = merge_para_with_text(para_block)
+                x0, y0, x1, y1 = para_block['bbox']
+                content = ContentObject(
+                    anno_id=uniq_anno_id,
+                    category_type=CategoryType.text,
+                    text=para_text,
+                    order=order_id,
+                    poly=[x0, y0, x1, y0, x1, y1, x0, y1],
+                )
+                uniq_anno_id += 1
+                order_id += 1
+                layout_dets.append(content)
+
+            elif para_type == BlockType.Title:
+                para_text = merge_para_with_text(para_block)
+                x0, y0, x1, y1 = para_block['bbox']
+                content = ContentObject(
+                    anno_id=uniq_anno_id,
+                    category_type=CategoryType.title,
+                    text=para_text,
+                    order=order_id,
+                    poly=[x0, y0, x1, y0, x1, y1, x0, y1],
+                )
+                uniq_anno_id += 1
+                order_id += 1
+                layout_dets.append(content)
+
+            elif para_type == BlockType.InterlineEquation:
+                para_text = merge_para_with_text(para_block)
+                x0, y0, x1, y1 = para_block['bbox']
+                content = ContentObject(
+                    anno_id=uniq_anno_id,
+                    category_type=CategoryType.interline_equation,
+                    text=para_text,
+                    order=order_id,
+                    poly=[x0, y0, x1, y0, x1, y1, x0, y1],
+                )
+                uniq_anno_id += 1
+                order_id += 1
+                layout_dets.append(content)
+
+            elif para_type == BlockType.Image:
+                body_anno_id = -1
+                caption_anno_id = -1
+
+                for block in para_block['blocks']:
+                    if block['type'] == BlockType.ImageBody:
+                        for line in block['lines']:
+                            for span in line['spans']:
+                                if span['type'] == ContentType.Image:
+                                    x0, y0, x1, y1 = block['bbox']
+                                    content = ContentObject(
+                                        anno_id=uniq_anno_id,
+                                        category_type=CategoryType.image_body,
+                                        image_path=os.path.join(
+                                            output_dir, span['image_path']),
+                                        order=order_id,
+                                        poly=[x0, y0, x1, y0, x1, y1, x0, y1],
+                                    )
+                                    body_anno_id = uniq_anno_id
+                                    uniq_anno_id += 1
+                                    order_id += 1
+                                    layout_dets.append(content)
+
+                for block in para_block['blocks']:
+                    if block['type'] == BlockType.ImageCaption:
+                        para_text += merge_para_with_text(block)
+                        x0, y0, x1, y1 = block['bbox']
+                        content = ContentObject(
+                            anno_id=uniq_anno_id,
+                            category_type=CategoryType.image_caption,
+                            text=para_text,
+                            order=order_id,
+                            poly=[x0, y0, x1, y0, x1, y1, x0, y1],
+                        )
+                        caption_anno_id = uniq_anno_id
+                        uniq_anno_id += 1
+                        order_id += 1
+                        layout_dets.append(content)
+
+                if body_anno_id > 0 and caption_anno_id > 0:
+                    element_relation = ElementRelation(
+                        relation=ElementRelType.sibling,
+                        source_anno_id=body_anno_id,
+                        target_anno_id=caption_anno_id,
+                    )
+                    extra_element_relation.append(element_relation)
+
+            elif para_type == BlockType.Table:
+                body_anno_id, caption_anno_id, footnote_anno_id = -1, -1, -1
+
+                for block in para_block['blocks']:
+                    if block['type'] == BlockType.TableCaption:
+                        para_text += merge_para_with_text(block)
+                        x0, y0, x1, y1 = block['bbox']
+                        content = ContentObject(
+                            anno_id=uniq_anno_id,
+                            category_type=CategoryType.table_caption,
+                            text=para_text,
+                            order=order_id,
+                            poly=[x0, y0, x1, y0, x1, y1, x0, y1],
+                        )
+                        caption_anno_id = uniq_anno_id
+                        uniq_anno_id += 1
+                        order_id += 1
+                        layout_dets.append(content)
+
+                for block in para_block['blocks']:
+                    if block['type'] == BlockType.TableBody:
+                        for line in block['lines']:
+                            for span in line['spans']:
+                                if span['type'] == ContentType.Table:
+                                    x0, y0, x1, y1 = para_block['bbox']
+                                    content = ContentObject(
+                                        anno_id=uniq_anno_id,
+                                        category_type=CategoryType.table_body,
+                                        order=order_id,
+                                        poly=[x0, y0, x1, y0, x1, y1, x0, y1],
+                                    )
+                                    body_anno_id = uniq_anno_id
+                                    uniq_anno_id += 1
+                                    order_id += 1
+                                    # if processed by table model
+                                    if span.get('latex', ''):
+                                        content.latex = span['latex']
+                                    else:
+                                        content.image_path = os.path.join(
+                                            output_dir, span['image_path'])
+                                    layout_dets.append(content)
+
+                for block in para_block['blocks']:
+                    if block['type'] == BlockType.TableFootnote:
+                        para_text += merge_para_with_text(block)
+                        x0, y0, x1, y1 = block['bbox']
+                        content = ContentObject(
+                            anno_id=uniq_anno_id,
+                            category_type=CategoryType.table_footnote,
+                            text=para_text,
+                            order=order_id,
+                            poly=[x0, y0, x1, y0, x1, y1, x0, y1],
+                        )
+                        footnote_anno_id = uniq_anno_id
+                        uniq_anno_id += 1
+                        order_id += 1
+                        layout_dets.append(content)
+
+                if caption_anno_id != -1 and body_anno_id != -1:
+                    element_relation = ElementRelation(
+                        relation=ElementRelType.sibling,
+                        source_anno_id=body_anno_id,
+                        target_anno_id=caption_anno_id,
+                    )
+                    extra_element_relation.append(element_relation)
+
+                if footnote_anno_id != -1 and body_anno_id != -1:
+                    element_relation = ElementRelation(
+                        relation=ElementRelType.sibling,
+                        source_anno_id=body_anno_id,
+                        target_anno_id=footnote_anno_id,
+                    )
+                    extra_element_relation.append(element_relation)
+
+        res.append(
+            LayoutElements(
+                page_info=page_info,
+                layout_dets=layout_dets,
+                extra=LayoutElementsExtra(
+                    element_relation=extra_element_relation),
+            ))
+
+    return res
+
+
+def inference(path, output_dir, method):
+    model_config.__use_inside_model__ = True
+    model_config.__model_mode__ = 'full'
+    if output_dir == '':
+        if os.path.isdir(path):
+            output_dir = os.path.join(path, 'output')
+        else:
+            output_dir = os.path.join(os.path.dirname(path), 'output')
+
+    local_image_dir, local_md_dir = prepare_env(output_dir,
+                                                str(Path(path).stem), method)
+
+    def read_fn(path):
+        disk_rw = DiskReaderWriter(os.path.dirname(path))
+        return disk_rw.read(os.path.basename(path), AbsReaderWriter.MODE_BIN)
+
+    def parse_doc(doc_path: str):
+        try:
+            file_name = str(Path(doc_path).stem)
+            pdf_data = read_fn(doc_path)
+            do_parse(
+                output_dir,
+                file_name,
+                pdf_data,
+                [],
+                method,
+                False,
+                f_draw_span_bbox=False,
+                f_draw_layout_bbox=False,
+                f_dump_md=False,
+                f_dump_middle_json=True,
+                f_dump_model_json=False,
+                f_dump_orig_pdf=False,
+                f_dump_content_list=False,
+                f_draw_model_bbox=False,
+            )
+
+            middle_json_fn = os.path.join(local_md_dir,
+                                          f'{file_name}_middle.json')
+            with open(middle_json_fn) as fd:
+                jso = json.load(fd)
+            os.remove(middle_json_fn)
+            return convert_middle_json_to_layout_elements(jso, local_image_dir)
+
+        except Exception as e:
+            logger.exception(e)
+
+    return parse_doc(path)
+
+
+if __name__ == '__main__':
+    import pprint
+
+    base_dir = '/opt/data/pdf/resources/samples/'
+    if 0:
+        with open(base_dir + 'json_outputs/middle.json') as f:
+            d = json.load(f)
+        result = convert_middle_json_to_layout_elements(d, '/tmp')
+        pprint.pp(result)
+    if 0:
+        with open(base_dir + 'json_outputs/middle.3.json') as f:
+            d = json.load(f)
+        result = convert_middle_json_to_layout_elements(d, '/tmp')
+        pprint.pp(result)
+
+    if 1:
+        res = inference(
+            base_dir + 'samples/pdf/one_page_with_table_image.pdf',
+            '/tmp/output',
+            'ocr',
+        )
+        pprint.pp(res)

+ 2 - 0
magic_pdf/tools/cli_dev.py

@@ -86,6 +86,7 @@ def jsonl(jsonl, method, output_dir):
         pdf_data,
         jso['doc_layout_result'],
         method,
+        False,
         f_dump_content_list=True,
         f_draw_model_bbox=True,
     )
@@ -141,6 +142,7 @@ def pdf(pdf, json_data, output_dir, method):
         pdf_data,
         model_json_list,
         method,
+        False,
         f_dump_content_list=True,
         f_draw_model_bbox=True,
     )

+ 157 - 0
projects/llama_index_rag/README.md

@@ -0,0 +1,157 @@
+## 安装
+
+MinerU
+
+```bash
+git clone https://github.com/opendatalab/MinerU.git
+cd MinerU
+
+conda create -n MinerU python=3.10
+conda activate MinerU
+pip install .[full] --extra-index-url https://wheels.myhloli.com
+```
+
+第三方软件
+
+```bash
+# install
+pip install llama-index-vector-stores-elasticsearch==0.2.0
+pip install llama-index-embeddings-dashscope==0.2.0
+pip install llama-index-core==0.10.68
+pip install einops==0.7.0
+pip install transformers-stream-generator==0.0.5
+pip install accelerate==0.33.0
+
+# uninstall
+pip uninstall transformer-engine
+```
+
+## 环境配置
+
+```
+export DASHSCOPE_API_KEY={some_key}
+export ES_USER={some_es_user}
+export ES_PASSWORD={some_es_password}
+export ES_URL=http://{es_url}:9200
+```
+
+DASHSCOPE_API_KEY 的开通参考[文档](https://help.aliyun.com/zh/dashscope/opening-service)
+
+## 使用
+
+### 导入数据
+
+```bash
+python data_ingestion.py -p some.pdf  # load data from pdf
+
+    or
+
+python data_ingestion.py -p /opt/data/some_pdf_directory/ # load data from multiples pdf which under the directory of {some_pdf_directory}
+```
+
+### 查询
+
+```bash
+python query.py --question '{the_question_you_want_to_ask}'
+```
+
+## 示例
+
+````bash
+# 启动 es 服务
+docker compose up -d
+
+or
+
+docker-compose up -d
+
+
+# 配置环境变量
+export ES_USER=elastic
+export ES_PASSWORD=llama_index
+export ES_URL=http://127.0.0.1:9200
+
+
+# 导入数据
+python data_ingestion.py example/data/declaration_of_the_rights_of_man_1789.pdf
+
+
+# 查询问题
+python query.py -q 'how about the rights of men'
+
+## outputs
+请基于```内的内容回答问题。"
+            ```
+            I. Men are born, and always continue, free and equal in respect of their rights. Civil distinctions, therefore, can be founded only on public utility.
+            ```
+            我的问题是:how about the rights of men。
+
+question: how about the rights of men
+answer: The statement implies that men are born free and equal in terms of their rights. Civil distinctions should only be based on public utility. However, it does not specify what those rights are. It is up to society and individual countries to determine and protect the specific rights of their citizens.
+
+````
+
+## 开发
+
+`MinerU` 提供了 `RAG` 集成接口,用户可以通过指定输入单个 `pdf` 文件或者某个目录。`MinerU` 会自动解析输入文件并返回可以迭代的接口用于获取数据
+
+### API 接口
+
+```python
+from magic_pdf.integrations.rag.type import Node
+
+class RagPageReader:
+    def get_rel_map(self) -> list[ElementRelation]:
+        # 获取节点的间的关系
+        pass
+    ...
+
+class RagDocumentReader:
+    ...
+
+class DataReader:
+    def __init__(self, path_or_directory: str, method: str, output_dir: str):
+        pass
+
+    def get_documents_count(self) -> int:
+        """获取 pdf 文档数量"""
+        pass
+
+    def get_document_result(self, idx: int) -> RagDocumentReader | None:
+        """获取某个 pdf 的解析内容"""
+        pass
+
+
+    def get_document_filename(self, idx: int) -> Path:
+        """获取某个 pdf 的具体的路径"""
+        pass
+
+
+```
+
+类型定义
+
+```python
+
+class Node(BaseModel):
+    category_type: CategoryType = Field(description='类别') # 类别
+    text: str | None = Field(description='文本内容',
+                             default=None)
+    image_path: str | None = Field(description='图或者表格(表可能用图片形式存储)的存储路径',
+                                   default=None)
+    anno_id: int = Field(description='unique id', default=-1)
+    latex: str | None = Field(description='公式或表格 latex 解析结果', default=None)
+    html: str | None = Field(description='表格的 html 解析结果', default=None)
+
+```
+
+表格存储形式可能会是 图片、latex、html 三种形式之一。
+anno_id 是该 Node 的在全局唯一ID。后续可以用于匹配该 Node 和其他 Node 的关系。节点的关系可以通过方法 `get_rel_map` 获取。用户可以用 `anno_id` 匹配节点之间的关系,并用于构建具备节点的关系的 rag index。
+
+### 节点类型关系矩阵
+
+|                | image_body | table_body |
+| -------------- | ---------- | ---------- |
+| image_caption  | sibling    |            |
+| table_caption  |            | sibling    |
+| table_footnote |            | sibling    |

+ 68 - 0
projects/llama_index_rag/data_ingestion.py

@@ -0,0 +1,68 @@
+import os
+
+import click
+from llama_index.core.schema import TextNode
+from llama_index.embeddings.dashscope import (DashScopeEmbedding,
+                                              DashScopeTextEmbeddingModels,
+                                              DashScopeTextEmbeddingType)
+from llama_index.vector_stores.elasticsearch import ElasticsearchStore
+
+from magic_pdf.integrations.rag.api import DataReader
+
+es_vec_store = ElasticsearchStore(
+    index_name='rag_index',
+    es_url=os.getenv('ES_URL', 'http://127.0.0.1:9200'),
+    es_user=os.getenv('ES_USER', 'elastic'),
+    es_password=os.getenv('ES_PASSWORD', 'llama_index'),
+)
+
+
+# Create embeddings
+# text_type=`document` to build index
+def embed_node(node):
+    embedder = DashScopeEmbedding(
+        model_name=DashScopeTextEmbeddingModels.TEXT_EMBEDDING_V2,
+        text_type=DashScopeTextEmbeddingType.TEXT_TYPE_DOCUMENT,
+    )
+
+    result_embeddings = embedder.get_text_embedding(node.text)
+    node.embedding = result_embeddings
+    return node
+
+
+@click.command()
+@click.option(
+    '-p',
+    '--path',
+    'path',
+    type=click.Path(exists=True),
+    required=True,
+    help='local pdf filepath or directory',
+)
+def cli(path):
+    output_dir = '/tmp/magic_pdf/integrations/rag/'
+    os.makedirs(output_dir, exist_ok=True)
+    documents = DataReader(path, 'ocr', output_dir)
+
+    # build nodes
+    nodes = []
+
+    for idx in range(documents.get_documents_count()):
+        doc = documents.get_document_result(idx)
+        if doc is None:  # something wrong happens when parse pdf !
+            continue
+
+        for page in iter(
+                doc):  # iterate documents from initial page to last page !
+            for element in iter(page):  # iterate the element from all page !
+                if element.text is None:
+                    continue
+                nodes.append(
+                    embed_node(
+                        TextNode(text=element.text,
+                                 metadata={'purpose': 'demo'})))
+    es_vec_store.add(nodes)
+
+
+if __name__ == '__main__':
+    cli()

+ 24 - 0
projects/llama_index_rag/docker-compose.yml

@@ -0,0 +1,24 @@
+services:
+  es:
+    container_name: es
+    image: docker.elastic.co/elasticsearch/elasticsearch:8.11.3
+    volumes:
+      - esdata01:/usr/share/elasticsearch/data
+    ports:
+      - 9200:9200
+    environment:
+      - node.name=es
+      - ELASTIC_PASSWORD=llama_index
+      - bootstrap.memory_lock=false
+      - discovery.type=single-node
+      - xpack.security.enabled=true
+      - xpack.security.http.ssl.enabled=false
+      - xpack.security.transport.ssl.enabled=false
+    ulimits:
+      memlock:
+        soft: -1
+        hard: -1
+    restart: always
+volumes:
+  esdata01:
+    driver: local

BIN
projects/llama_index_rag/example/data/declaration_of_the_rights_of_man_1789.pdf


+ 84 - 0
projects/llama_index_rag/query.py

@@ -0,0 +1,84 @@
+import os
+
+import click
+from llama_index.core.vector_stores.types import VectorStoreQuery
+from llama_index.embeddings.dashscope import (DashScopeEmbedding,
+                                              DashScopeTextEmbeddingModels,
+                                              DashScopeTextEmbeddingType)
+from llama_index.vector_stores.elasticsearch import (AsyncDenseVectorStrategy,
+                                                     ElasticsearchStore)
+# initialize qwen 7B model
+from modelscope import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
+
+es_vector_store = ElasticsearchStore(
+    index_name='rag_index',
+    es_url=os.getenv('ES_URL', 'http://127.0.0.1:9200'),
+    es_user=os.getenv('ES_USER', 'elastic'),
+    es_password=os.getenv('ES_PASSWORD', 'llama_index'),
+    retrieval_strategy=AsyncDenseVectorStrategy(),
+)
+
+
+def embed_text(text):
+    embedder = DashScopeEmbedding(
+        model_name=DashScopeTextEmbeddingModels.TEXT_EMBEDDING_V2,
+        text_type=DashScopeTextEmbeddingType.TEXT_TYPE_DOCUMENT,
+    )
+    return embedder.get_text_embedding(text)
+
+
+def search(vector_store: ElasticsearchStore, query: str):
+    query_vec = VectorStoreQuery(query_embedding=embed_text(query))
+    result = vector_store.query(query_vec)
+    return '\n'.join([node.text for node in result.nodes])
+
+
+@click.command()
+@click.option(
+    '-q',
+    '--question',
+    'question',
+    required=True,
+    help='ask what you want to know!',
+)
+def cli(question):
+    tokenizer = AutoTokenizer.from_pretrained('qwen/Qwen-7B-Chat',
+                                              revision='v1.0.5',
+                                              trust_remote_code=True)
+    model = AutoModelForCausalLM.from_pretrained('qwen/Qwen-7B-Chat',
+                                                 revision='v1.0.5',
+                                                 device_map='auto',
+                                                 trust_remote_code=True,
+                                                 fp32=True).eval()
+    model.generation_config = GenerationConfig.from_pretrained(
+        'Qwen/Qwen-7B-Chat', revision='v1.0.5', trust_remote_code=True)
+
+    # define a prompt template for the vectorDB-enhanced LLM generation
+    def answer_question(question, context, model):
+        if context == '':
+            prompt = question
+        else:
+            prompt = f'''请基于```内的内容回答问题。"
+            ```
+            {context}
+            ```
+            我的问题是:{question}。
+            '''
+        history = None
+        print(prompt)
+        response, history = model.chat(tokenizer, prompt, history=None)
+        return response
+
+    answer = answer_question(question, search(es_vector_store, question),
+                             model)
+    print(f'question: {question}\n'
+          f'answer: {answer}')
+
+
+"""
+
+python query.py -q 'how about the rights of men'
+"""
+
+if __name__ == '__main__':
+    cli()

+ 1 - 1
requirements-docker.txt

@@ -15,4 +15,4 @@ paddleocr==2.7.3
 paddlepaddle==3.0.0b1
 pypandoc
 struct-eqtable==0.1.0
-detectron2
+detectron2

+ 5 - 4
requirements.txt

@@ -1,11 +1,12 @@
 boto3>=1.28.43
 Brotli>=1.1.0
 click>=8.1.7
-PyMuPDF>=1.24.9
+fast-langdetect==0.2.0
 loguru>=0.6.0
 numpy>=1.21.6,<2.0.0
-fast-langdetect==0.2.0
-wordninja>=2.0.0
-scikit-learn>=1.0.2
 pdfminer.six==20231228
+pydantic>=2.7.2,<2.8.0
+PyMuPDF>=1.24.9
+scikit-learn>=1.0.2
+wordninja>=2.0.0
 # The requirements.txt must ensure that only necessary external dependencies are introduced. If there are new dependencies to add, please contact the project administrator.

+ 2302 - 0
tests/test_integrations/test_rag/assets/middle.json

@@ -0,0 +1,2302 @@
+{
+    "pdf_info": [
+        {
+            "preproc_blocks": [
+                {
+                    "type": "text",
+                    "bbox": [
+                        47,
+                        57,
+                        299,
+                        93
+                    ],
+                    "lines": [
+                        {
+                            "bbox": [
+                                47,
+                                57,
+                                299,
+                                68
+                            ],
+                            "spans": [
+                                {
+                                    "bbox": [
+                                        49,
+                                        57,
+                                        298,
+                                        68
+                                    ],
+                                    "score": 0.98,
+                                    "content": "of the synthetic stereo scene from a single camera perspective",
+                                    "type": "text"
+                                }
+                            ]
+                        },
+                        {
+                            "bbox": [
+                                47,
+                                71,
+                                299,
+                                80
+                            ],
+                            "spans": [
+                                {
+                                    "bbox": [
+                                        49,
+                                        71,
+                                        299,
+                                        80
+                                    ],
+                                    "score": 0.96,
+                                    "content": "along with the ground truth disparity,occlusion map,and",
+                                    "type": "text"
+                                }
+                            ]
+                        },
+                        {
+                            "bbox": [
+                                47,
+                                82,
+                                123,
+                                93
+                            ],
+                            "spans": [
+                                {
+                                    "bbox": [
+                                        49,
+                                        82,
+                                        123,
+                                        93
+                                    ],
+                                    "score": 0.99,
+                                    "content": "discontinuitymap.",
+                                    "type": "text"
+                                }
+                            ]
+                        }
+                    ]
+                },
+                {
+                    "type": "image",
+                    "bbox": [
+                        47,
+                        100,
+                        301,
+                        535
+                    ],
+                    "blocks": [
+                        {
+                            "bbox": [
+                                51,
+                                100,
+                                292,
+                                484
+                            ],
+                            "type": "image_body",
+                            "lines": [
+                                {
+                                    "bbox": [
+                                        51,
+                                        100,
+                                        292,
+                                        484
+                                    ],
+                                    "spans": [
+                                        {
+                                            "bbox": [
+                                                51,
+                                                100,
+                                                292,
+                                                484
+                                            ],
+                                            "score": 0.9999815225601196,
+                                            "type": "image",
+                                            "image_path": "b07d74524eac6f46b5505b48b1e10db23f2b45cb2d21d5fec72e967e61255811.jpg"
+                                        }
+                                    ]
+                                }
+                            ]
+                        },
+                        {
+                            "bbox": [
+                                47,
+                                488,
+                                301,
+                                535
+                            ],
+                            "type": "image_caption",
+                            "lines": [
+                                {
+                                    "bbox": [
+                                        49,
+                                        490,
+                                        299,
+                                        499
+                                    ],
+                                    "spans": [
+                                        {
+                                            "bbox": [
+                                                49,
+                                                490,
+                                                299,
+                                                499
+                                            ],
+                                            "score": 1.0,
+                                            "content": "Figure2:Twosampleframesfromthesyntheticvideose-",
+                                            "type": "text"
+                                        }
+                                    ]
+                                },
+                                {
+                                    "bbox": [
+                                        48,
+                                        501,
+                                        300,
+                                        512
+                                    ],
+                                    "spans": [
+                                        {
+                                            "bbox": [
+                                                48,
+                                                501,
+                                                300,
+                                                512
+                                            ],
+                                            "score": 1.0,
+                                            "content": "quence (1st row), along with their corresponding ground truth",
+                                            "type": "text"
+                                        }
+                                    ]
+                                },
+                                {
+                                    "bbox": [
+                                        48,
+                                        513,
+                                        299,
+                                        523
+                                    ],
+                                    "spans": [
+                                        {
+                                            "bbox": [
+                                                48,
+                                                513,
+                                                299,
+                                                523
+                                            ],
+                                            "score": 0.98,
+                                            "content": "disparity (2nd row), occlusion map (3rd row), and discontinuity",
+                                            "type": "text"
+                                        }
+                                    ]
+                                },
+                                {
+                                    "bbox": [
+                                        48,
+                                        525,
+                                        110,
+                                        535
+                                    ],
+                                    "spans": [
+                                        {
+                                            "bbox": [
+                                                48,
+                                                525,
+                                                110,
+                                                535
+                                            ],
+                                            "score": 0.99,
+                                            "content": "map (4th row).",
+                                            "type": "text"
+                                        }
+                                    ]
+                                }
+                            ]
+                        }
+                    ]
+                },
+                {
+                    "type": "text",
+                    "bbox": [
+                        47,
+                        549,
+                        299,
+                        678
+                    ],
+                    "lines": [
+                        {
+                            "bbox": [
+                                58,
+                                549,
+                                299,
+                                558
+                            ],
+                            "spans": [
+                                {
+                                    "bbox": [
+                                        58,
+                                        549,
+                                        298,
+                                        558
+                                    ],
+                                    "score": 0.98,
+                                    "content": "Theresultsof temporalstereomatching aregiveninFigure",
+                                    "type": "text"
+                                }
+                            ]
+                        },
+                        {
+                            "bbox": [
+                                47,
+                                561,
+                                299,
+                                570
+                            ],
+                            "spans": [
+                                {
+                                    "bbox": [
+                                        47,
+                                        561,
+                                        298,
+                                        570
+                                    ],
+                                    "score": 0.98,
+                                    "content": "3foruniformadditivenoiseconfinedtotherangesof±O",
+                                    "type": "text"
+                                }
+                            ]
+                        },
+                        {
+                            "bbox": [
+                                47,
+                                573,
+                                299,
+                                582
+                            ],
+                            "spans": [
+                                {
+                                    "bbox": [
+                                        49,
+                                        573,
+                                        299,
+                                        582
+                                    ],
+                                    "score": 0.96,
+                                    "content": "±20, and ±40. Each performance plot is given as a function",
+                                    "type": "text"
+                                }
+                            ]
+                        },
+                        {
+                            "bbox": [
+                                47,
+                                585,
+                                299,
+                                594
+                            ],
+                            "spans": [
+                                {
+                                    "bbox": [
+                                        48,
+                                        585,
+                                        299,
+                                        594
+                                    ],
+                                    "score": 0.95,
+                                    "content": "of the feedback coefficient X. As with the majority of temporal",
+                                    "type": "text"
+                                }
+                            ]
+                        },
+                        {
+                            "bbox": [
+                                47,
+                                597,
+                                299,
+                                606
+                            ],
+                            "spans": [
+                                {
+                                    "bbox": [
+                                        49,
+                                        597,
+                                        299,
+                                        606
+                                    ],
+                                    "score": 0.99,
+                                    "content": "stereomatching methods,improvements are negligible when",
+                                    "type": "text"
+                                }
+                            ]
+                        },
+                        {
+                            "bbox": [
+                                47,
+                                609,
+                                299,
+                                618
+                            ],
+                            "spans": [
+                                {
+                                    "bbox": [
+                                        48,
+                                        609,
+                                        299,
+                                        618
+                                    ],
+                                    "score": 0.97,
+                                    "content": "no noise is added to the images [1o], [19]. This is largely due",
+                                    "type": "text"
+                                }
+                            ]
+                        },
+                        {
+                            "bbox": [
+                                47,
+                                621,
+                                299,
+                                629
+                            ],
+                            "spans": [
+                                {
+                                    "bbox": [
+                                        48,
+                                        621,
+                                        299,
+                                        629
+                                    ],
+                                    "score": 1.0,
+                                    "content": "tothefactthatthevideousedtoevaluatethesemethodsis",
+                                    "type": "text"
+                                }
+                            ]
+                        },
+                        {
+                            "bbox": [
+                                47,
+                                633,
+                                299,
+                                641
+                            ],
+                            "spans": [
+                                {
+                                    "bbox": [
+                                        48,
+                                        633,
+                                        299,
+                                        641
+                                    ],
+                                    "score": 1.0,
+                                    "content": "computergeneratedwithverylittlenoisetostartwith,thus",
+                                    "type": "text"
+                                }
+                            ]
+                        },
+                        {
+                            "bbox": [
+                                47,
+                                644,
+                                299,
+                                654
+                            ],
+                            "spans": [
+                                {
+                                    "bbox": [
+                                        48,
+                                        644,
+                                        299,
+                                        654
+                                    ],
+                                    "score": 0.98,
+                                    "content": "the noise suppression achieved with temporal stereo matching",
+                                    "type": "text"
+                                }
+                            ]
+                        },
+                        {
+                            "bbox": [
+                                47,
+                                657,
+                                299,
+                                666
+                            ],
+                            "spans": [
+                                {
+                                    "bbox": [
+                                        48,
+                                        657,
+                                        299,
+                                        666
+                                    ],
+                                    "score": 0.98,
+                                    "content": "showslittletonoimprovementovermethodsthatoperate on",
+                                    "type": "text"
+                                }
+                            ]
+                        },
+                        {
+                            "bbox": [
+                                47,
+                                669,
+                                113,
+                                678
+                            ],
+                            "spans": [
+                                {
+                                    "bbox": [
+                                        48,
+                                        669,
+                                        113,
+                                        678
+                                    ],
+                                    "score": 1.0,
+                                    "content": "pairsofimages.",
+                                    "type": "text"
+                                }
+                            ]
+                        }
+                    ]
+                },
+                {
+                    "type": "text",
+                    "bbox": [
+                        47,
+                        680,
+                        299,
+                        725
+                    ],
+                    "lines": [
+                        {
+                            "bbox": [
+                                58,
+                                680,
+                                299,
+                                690
+                            ],
+                            "spans": [
+                                {
+                                    "bbox": [
+                                        59,
+                                        680,
+                                        298,
+                                        690
+                                    ],
+                                    "score": 0.97,
+                                    "content": "Significantimprovementsin accuracy canbeseenin Figure",
+                                    "type": "text"
+                                }
+                            ]
+                        },
+                        {
+                            "bbox": [
+                                47,
+                                692,
+                                299,
+                                701
+                            ],
+                            "spans": [
+                                {
+                                    "bbox": [
+                                        48,
+                                        692,
+                                        298,
+                                        701
+                                    ],
+                                    "score": 0.97,
+                                    "content": "3 when the noise has ranges of ±20, and ±40.In this scenario",
+                                    "type": "text"
+                                }
+                            ]
+                        },
+                        {
+                            "bbox": [
+                                47,
+                                703,
+                                299,
+                                714
+                            ],
+                            "spans": [
+                                {
+                                    "bbox": [
+                                        48,
+                                        703,
+                                        299,
+                                        714
+                                    ],
+                                    "score": 0.98,
+                                    "content": "the effect of noise in the current frame is reduced by increasing",
+                                    "type": "text"
+                                }
+                            ]
+                        },
+                        {
+                            "bbox": [
+                                47,
+                                716,
+                                299,
+                                725
+                            ],
+                            "spans": [
+                                {
+                                    "bbox": [
+                                        48,
+                                        716,
+                                        299,
+                                        725
+                                    ],
+                                    "score": 0.96,
+                                    "content": "thefeedbackcoefficientX.Thisincreasing ofXhas theeffect",
+                                    "type": "text"
+                                }
+                            ]
+                        }
+                    ]
+                },
+                {
+                    "type": "image",
+                    "bbox": [
+                        310,
+                        55,
+                        564,
+                        371
+                    ],
+                    "blocks": [
+                        {
+                            "bbox": [
+                                314,
+                                55,
+                                538,
+                                305
+                            ],
+                            "type": "image_body",
+                            "lines": [
+                                {
+                                    "bbox": [
+                                        314,
+                                        55,
+                                        538,
+                                        305
+                                    ],
+                                    "spans": [
+                                        {
+                                            "bbox": [
+                                                314,
+                                                55,
+                                                538,
+                                                305
+                                            ],
+                                            "score": 0.9999905824661255,
+                                            "type": "image",
+                                            "image_path": "c7539af438972442d0f86aa46409e6684338ddfd1fbfd6bdacf02220853ccb55.jpg"
+                                        }
+                                    ]
+                                }
+                            ]
+                        },
+                        {
+                            "bbox": [
+                                310,
+                                311,
+                                564,
+                                371
+                            ],
+                            "type": "image_caption",
+                            "lines": [
+                                {
+                                    "bbox": [
+                                        312,
+                                        313,
+                                        562,
+                                        322
+                                    ],
+                                    "spans": [
+                                        {
+                                            "bbox": [
+                                                312,
+                                                313,
+                                                562,
+                                                322
+                                            ],
+                                            "score": 0.97,
+                                            "content": "Figure 3: Performance of temporal matching at different levels",
+                                            "type": "text"
+                                        }
+                                    ]
+                                },
+                                {
+                                    "bbox": [
+                                        312,
+                                        325,
+                                        561,
+                                        334
+                                    ],
+                                    "spans": [
+                                        {
+                                            "bbox": [
+                                                312,
+                                                325,
+                                                561,
+                                                334
+                                            ],
+                                            "score": 0.98,
+                                            "content": "of uniformly distributed image noise{±0,±20,±40}.Mean",
+                                            "type": "text"
+                                        }
+                                    ]
+                                },
+                                {
+                                    "bbox": [
+                                        311,
+                                        336,
+                                        563,
+                                        347
+                                    ],
+                                    "spans": [
+                                        {
+                                            "bbox": [
+                                                311,
+                                                336,
+                                                563,
+                                                347
+                                            ],
+                                            "score": 0.99,
+                                            "content": "squared error (MSE) of disparities is plotted versus the values",
+                                            "type": "text"
+                                        }
+                                    ]
+                                },
+                                {
+                                    "bbox": [
+                                        311,
+                                        348,
+                                        561,
+                                        358
+                                    ],
+                                    "spans": [
+                                        {
+                                            "bbox": [
+                                                311,
+                                                348,
+                                                561,
+                                                358
+                                            ],
+                                            "score": 0.96,
+                                            "content": "of the feedback coefficient X. Dashed lines correspond to the",
+                                            "type": "text"
+                                        }
+                                    ]
+                                },
+                                {
+                                    "bbox": [
+                                        311,
+                                        360,
+                                        535,
+                                        371
+                                    ],
+                                    "spans": [
+                                        {
+                                            "bbox": [
+                                                311,
+                                                360,
+                                                535,
+                                                371
+                                            ],
+                                            "score": 0.96,
+                                            "content": "values of MSE obtained without temporal aggregation.",
+                                            "type": "text"
+                                        }
+                                    ]
+                                }
+                            ]
+                        }
+                    ]
+                },
+                {
+                    "type": "image",
+                    "bbox": [
+                        310,
+                        418,
+                        563,
+                        666
+                    ],
+                    "blocks": [
+                        {
+                            "bbox": [
+                                314,
+                                418,
+                                549,
+                                623
+                            ],
+                            "type": "image_body",
+                            "lines": [
+                                {
+                                    "bbox": [
+                                        314,
+                                        418,
+                                        549,
+                                        623
+                                    ],
+                                    "spans": [
+                                        {
+                                            "bbox": [
+                                                314,
+                                                418,
+                                                549,
+                                                623
+                                            ],
+                                            "score": 0.9999067783355713,
+                                            "type": "image",
+                                            "image_path": "9ac4db9197801de4a20dbc9ea17bc0c53afb7290dc8b5b45d9e92e830566cb14.jpg"
+                                        }
+                                    ]
+                                }
+                            ]
+                        },
+                        {
+                            "bbox": [
+                                310,
+                                630,
+                                563,
+                                666
+                            ],
+                            "type": "image_caption",
+                            "lines": [
+                                {
+                                    "bbox": [
+                                        312,
+                                        631,
+                                        562,
+                                        641
+                                    ],
+                                    "spans": [
+                                        {
+                                            "bbox": [
+                                                312,
+                                                631,
+                                                562,
+                                                641
+                                            ],
+                                            "score": 0.94,
+                                            "content": "Figure 4:Optimal values of the feedback coefficient \\ cor-",
+                                            "type": "text"
+                                        }
+                                    ]
+                                },
+                                {
+                                    "bbox": [
+                                        312,
+                                        644,
+                                        561,
+                                        652
+                                    ],
+                                    "spans": [
+                                        {
+                                            "bbox": [
+                                                312,
+                                                644,
+                                                561,
+                                                652
+                                            ],
+                                            "score": 0.97,
+                                            "content": "responding to the smallest mean squared error (MSE)of the",
+                                            "type": "text"
+                                        }
+                                    ]
+                                },
+                                {
+                                    "bbox": [
+                                        312,
+                                        655,
+                                        513,
+                                        665
+                                    ],
+                                    "spans": [
+                                        {
+                                            "bbox": [
+                                                312,
+                                                655,
+                                                513,
+                                                665
+                                            ],
+                                            "score": 0.97,
+                                            "content": "disparity estimates for a range of noise strengths.",
+                                            "type": "text"
+                                        }
+                                    ]
+                                }
+                            ]
+                        }
+                    ]
+                },
+                {
+                    "type": "text",
+                    "bbox": [
+                        311,
+                        692,
+                        563,
+                        725
+                    ],
+                    "lines": [
+                        {
+                            "bbox": [
+                                311,
+                                692,
+                                563,
+                                702
+                            ],
+                            "spans": [
+                                {
+                                    "bbox": [
+                                        311,
+                                        692,
+                                        562,
+                                        702
+                                    ],
+                                    "score": 0.95,
+                                    "content": "of averaging out noise in the per-pixel costs by selecting",
+                                    "type": "text"
+                                }
+                            ]
+                        },
+                        {
+                            "bbox": [
+                                311,
+                                704,
+                                563,
+                                713
+                            ],
+                            "spans": [
+                                {
+                                    "bbox": [
+                                        311,
+                                        704,
+                                        562,
+                                        713
+                                    ],
+                                    "score": 0.98,
+                                    "content": "matches based more heavily upon the auxiliary cost, which",
+                                    "type": "text"
+                                }
+                            ]
+                        },
+                        {
+                            "bbox": [
+                                311,
+                                716,
+                                563,
+                                725
+                            ],
+                            "spans": [
+                                {
+                                    "bbox": [
+                                        311,
+                                        716,
+                                        563,
+                                        725
+                                    ],
+                                    "score": 0.97,
+                                    "content": "is essentially a much more stable running average of the cost",
+                                    "type": "text"
+                                }
+                            ]
+                        }
+                    ]
+                }
+            ],
+            "layout_bboxes": [
+                {
+                    "layout_bbox": [
+                        47,
+                        55,
+                        301,
+                        726
+                    ],
+                    "layout_label": "V",
+                    "sub_layout": []
+                },
+                {
+                    "layout_bbox": [
+                        310,
+                        55,
+                        564,
+                        726
+                    ],
+                    "layout_label": "V",
+                    "sub_layout": []
+                }
+            ],
+            "page_idx": 0,
+            "page_size": [
+                612.0,
+                792.0
+            ],
+            "_layout_tree": [
+                {
+                    "layout_bbox": [
+                        0,
+                        55,
+                        612.0,
+                        726
+                    ],
+                    "layout_label": "V",
+                    "sub_layout": [
+                        {
+                            "layout_bbox": [
+                                47,
+                                55,
+                                564,
+                                726
+                            ],
+                            "layout_label": "H",
+                            "sub_layout": [
+                                {
+                                    "layout_bbox": [
+                                        47,
+                                        55,
+                                        301,
+                                        726
+                                    ],
+                                    "layout_label": "V",
+                                    "sub_layout": []
+                                },
+                                {
+                                    "layout_bbox": [
+                                        310,
+                                        55,
+                                        564,
+                                        726
+                                    ],
+                                    "layout_label": "V",
+                                    "sub_layout": []
+                                }
+                            ]
+                        }
+                    ]
+                }
+            ],
+            "images": [
+                {
+                    "type": "image",
+                    "bbox": [
+                        47,
+                        100,
+                        301,
+                        535
+                    ],
+                    "blocks": [
+                        {
+                            "bbox": [
+                                51,
+                                100,
+                                292,
+                                484
+                            ],
+                            "type": "image_body",
+                            "lines": [
+                                {
+                                    "bbox": [
+                                        51,
+                                        100,
+                                        292,
+                                        484
+                                    ],
+                                    "spans": [
+                                        {
+                                            "bbox": [
+                                                51,
+                                                100,
+                                                292,
+                                                484
+                                            ],
+                                            "score": 0.9999815225601196,
+                                            "type": "image",
+                                            "image_path": "b07d74524eac6f46b5505b48b1e10db23f2b45cb2d21d5fec72e967e61255811.jpg"
+                                        }
+                                    ]
+                                }
+                            ]
+                        },
+                        {
+                            "bbox": [
+                                47,
+                                488,
+                                301,
+                                535
+                            ],
+                            "type": "image_caption",
+                            "lines": [
+                                {
+                                    "bbox": [
+                                        49,
+                                        490,
+                                        299,
+                                        499
+                                    ],
+                                    "spans": [
+                                        {
+                                            "bbox": [
+                                                49,
+                                                490,
+                                                299,
+                                                499
+                                            ],
+                                            "score": 1.0,
+                                            "content": "Figure2:Twosampleframesfromthesyntheticvideose-",
+                                            "type": "text"
+                                        }
+                                    ]
+                                },
+                                {
+                                    "bbox": [
+                                        48,
+                                        501,
+                                        300,
+                                        512
+                                    ],
+                                    "spans": [
+                                        {
+                                            "bbox": [
+                                                48,
+                                                501,
+                                                300,
+                                                512
+                                            ],
+                                            "score": 1.0,
+                                            "content": "quence (1st row), along with their corresponding ground truth",
+                                            "type": "text"
+                                        }
+                                    ]
+                                },
+                                {
+                                    "bbox": [
+                                        48,
+                                        513,
+                                        299,
+                                        523
+                                    ],
+                                    "spans": [
+                                        {
+                                            "bbox": [
+                                                48,
+                                                513,
+                                                299,
+                                                523
+                                            ],
+                                            "score": 0.98,
+                                            "content": "disparity (2nd row), occlusion map (3rd row), and discontinuity",
+                                            "type": "text"
+                                        }
+                                    ]
+                                },
+                                {
+                                    "bbox": [
+                                        48,
+                                        525,
+                                        110,
+                                        535
+                                    ],
+                                    "spans": [
+                                        {
+                                            "bbox": [
+                                                48,
+                                                525,
+                                                110,
+                                                535
+                                            ],
+                                            "score": 0.99,
+                                            "content": "map (4th row).",
+                                            "type": "text"
+                                        }
+                                    ]
+                                }
+                            ]
+                        }
+                    ]
+                },
+                {
+                    "type": "image",
+                    "bbox": [
+                        310,
+                        55,
+                        564,
+                        371
+                    ],
+                    "blocks": [
+                        {
+                            "bbox": [
+                                314,
+                                55,
+                                538,
+                                305
+                            ],
+                            "type": "image_body",
+                            "lines": [
+                                {
+                                    "bbox": [
+                                        314,
+                                        55,
+                                        538,
+                                        305
+                                    ],
+                                    "spans": [
+                                        {
+                                            "bbox": [
+                                                314,
+                                                55,
+                                                538,
+                                                305
+                                            ],
+                                            "score": 0.9999905824661255,
+                                            "type": "image",
+                                            "image_path": "c7539af438972442d0f86aa46409e6684338ddfd1fbfd6bdacf02220853ccb55.jpg"
+                                        }
+                                    ]
+                                }
+                            ]
+                        },
+                        {
+                            "bbox": [
+                                310,
+                                311,
+                                564,
+                                371
+                            ],
+                            "type": "image_caption",
+                            "lines": [
+                                {
+                                    "bbox": [
+                                        312,
+                                        313,
+                                        562,
+                                        322
+                                    ],
+                                    "spans": [
+                                        {
+                                            "bbox": [
+                                                312,
+                                                313,
+                                                562,
+                                                322
+                                            ],
+                                            "score": 0.97,
+                                            "content": "Figure 3: Performance of temporal matching at different levels",
+                                            "type": "text"
+                                        }
+                                    ]
+                                },
+                                {
+                                    "bbox": [
+                                        312,
+                                        325,
+                                        561,
+                                        334
+                                    ],
+                                    "spans": [
+                                        {
+                                            "bbox": [
+                                                312,
+                                                325,
+                                                561,
+                                                334
+                                            ],
+                                            "score": 0.98,
+                                            "content": "of uniformly distributed image noise{±0,±20,±40}.Mean",
+                                            "type": "text"
+                                        }
+                                    ]
+                                },
+                                {
+                                    "bbox": [
+                                        311,
+                                        336,
+                                        563,
+                                        347
+                                    ],
+                                    "spans": [
+                                        {
+                                            "bbox": [
+                                                311,
+                                                336,
+                                                563,
+                                                347
+                                            ],
+                                            "score": 0.99,
+                                            "content": "squared error (MSE) of disparities is plotted versus the values",
+                                            "type": "text"
+                                        }
+                                    ]
+                                },
+                                {
+                                    "bbox": [
+                                        311,
+                                        348,
+                                        561,
+                                        358
+                                    ],
+                                    "spans": [
+                                        {
+                                            "bbox": [
+                                                311,
+                                                348,
+                                                561,
+                                                358
+                                            ],
+                                            "score": 0.96,
+                                            "content": "of the feedback coefficient X. Dashed lines correspond to the",
+                                            "type": "text"
+                                        }
+                                    ]
+                                },
+                                {
+                                    "bbox": [
+                                        311,
+                                        360,
+                                        535,
+                                        371
+                                    ],
+                                    "spans": [
+                                        {
+                                            "bbox": [
+                                                311,
+                                                360,
+                                                535,
+                                                371
+                                            ],
+                                            "score": 0.96,
+                                            "content": "values of MSE obtained without temporal aggregation.",
+                                            "type": "text"
+                                        }
+                                    ]
+                                }
+                            ]
+                        }
+                    ]
+                },
+                {
+                    "type": "image",
+                    "bbox": [
+                        310,
+                        418,
+                        563,
+                        666
+                    ],
+                    "blocks": [
+                        {
+                            "bbox": [
+                                314,
+                                418,
+                                549,
+                                623
+                            ],
+                            "type": "image_body",
+                            "lines": [
+                                {
+                                    "bbox": [
+                                        314,
+                                        418,
+                                        549,
+                                        623
+                                    ],
+                                    "spans": [
+                                        {
+                                            "bbox": [
+                                                314,
+                                                418,
+                                                549,
+                                                623
+                                            ],
+                                            "score": 0.9999067783355713,
+                                            "type": "image",
+                                            "image_path": "9ac4db9197801de4a20dbc9ea17bc0c53afb7290dc8b5b45d9e92e830566cb14.jpg"
+                                        }
+                                    ]
+                                }
+                            ]
+                        },
+                        {
+                            "bbox": [
+                                310,
+                                630,
+                                563,
+                                666
+                            ],
+                            "type": "image_caption",
+                            "lines": [
+                                {
+                                    "bbox": [
+                                        312,
+                                        631,
+                                        562,
+                                        641
+                                    ],
+                                    "spans": [
+                                        {
+                                            "bbox": [
+                                                312,
+                                                631,
+                                                562,
+                                                641
+                                            ],
+                                            "score": 0.94,
+                                            "content": "Figure 4:Optimal values of the feedback coefficient \\ cor-",
+                                            "type": "text"
+                                        }
+                                    ]
+                                },
+                                {
+                                    "bbox": [
+                                        312,
+                                        644,
+                                        561,
+                                        652
+                                    ],
+                                    "spans": [
+                                        {
+                                            "bbox": [
+                                                312,
+                                                644,
+                                                561,
+                                                652
+                                            ],
+                                            "score": 0.97,
+                                            "content": "responding to the smallest mean squared error (MSE)of the",
+                                            "type": "text"
+                                        }
+                                    ]
+                                },
+                                {
+                                    "bbox": [
+                                        312,
+                                        655,
+                                        513,
+                                        665
+                                    ],
+                                    "spans": [
+                                        {
+                                            "bbox": [
+                                                312,
+                                                655,
+                                                513,
+                                                665
+                                            ],
+                                            "score": 0.97,
+                                            "content": "disparity estimates for a range of noise strengths.",
+                                            "type": "text"
+                                        }
+                                    ]
+                                }
+                            ]
+                        }
+                    ]
+                }
+            ],
+            "tables": [],
+            "interline_equations": [],
+            "discarded_blocks": [],
+            "need_drop": false,
+            "drop_reason": [],
+            "para_blocks": [
+                {
+                    "type": "text",
+                    "bbox": [
+                        47,
+                        57,
+                        299,
+                        93
+                    ],
+                    "lines": [
+                        {
+                            "bbox": [
+                                47,
+                                57,
+                                299,
+                                68
+                            ],
+                            "spans": [
+                                {
+                                    "bbox": [
+                                        49,
+                                        57,
+                                        298,
+                                        68
+                                    ],
+                                    "score": 0.98,
+                                    "content": "of the synthetic stereo scene from a single camera perspective",
+                                    "type": "text"
+                                }
+                            ]
+                        },
+                        {
+                            "bbox": [
+                                47,
+                                71,
+                                299,
+                                80
+                            ],
+                            "spans": [
+                                {
+                                    "bbox": [
+                                        49,
+                                        71,
+                                        299,
+                                        80
+                                    ],
+                                    "score": 0.96,
+                                    "content": "along with the ground truth disparity,occlusion map,and",
+                                    "type": "text"
+                                }
+                            ]
+                        },
+                        {
+                            "bbox": [
+                                47,
+                                82,
+                                123,
+                                93
+                            ],
+                            "spans": [
+                                {
+                                    "bbox": [
+                                        49,
+                                        82,
+                                        123,
+                                        93
+                                    ],
+                                    "score": 0.99,
+                                    "content": "discontinuitymap.",
+                                    "type": "text"
+                                }
+                            ]
+                        }
+                    ]
+                },
+                {
+                    "type": "image",
+                    "bbox": [
+                        47,
+                        100,
+                        301,
+                        535
+                    ],
+                    "blocks": [
+                        {
+                            "bbox": [
+                                51,
+                                100,
+                                292,
+                                484
+                            ],
+                            "type": "image_body",
+                            "lines": [
+                                {
+                                    "bbox": [
+                                        51,
+                                        100,
+                                        292,
+                                        484
+                                    ],
+                                    "spans": [
+                                        {
+                                            "bbox": [
+                                                51,
+                                                100,
+                                                292,
+                                                484
+                                            ],
+                                            "score": 0.9999815225601196,
+                                            "type": "image",
+                                            "image_path": "b07d74524eac6f46b5505b48b1e10db23f2b45cb2d21d5fec72e967e61255811.jpg"
+                                        }
+                                    ]
+                                }
+                            ]
+                        },
+                        {
+                            "bbox": [
+                                47,
+                                488,
+                                301,
+                                535
+                            ],
+                            "type": "image_caption",
+                            "lines": [
+                                {
+                                    "bbox": [
+                                        49,
+                                        490,
+                                        299,
+                                        499
+                                    ],
+                                    "spans": [
+                                        {
+                                            "bbox": [
+                                                49,
+                                                490,
+                                                299,
+                                                499
+                                            ],
+                                            "score": 1.0,
+                                            "content": "Figure2:Twosampleframesfromthesyntheticvideose-",
+                                            "type": "text"
+                                        }
+                                    ]
+                                },
+                                {
+                                    "bbox": [
+                                        48,
+                                        501,
+                                        300,
+                                        512
+                                    ],
+                                    "spans": [
+                                        {
+                                            "bbox": [
+                                                48,
+                                                501,
+                                                300,
+                                                512
+                                            ],
+                                            "score": 1.0,
+                                            "content": "quence (1st row), along with their corresponding ground truth",
+                                            "type": "text"
+                                        }
+                                    ]
+                                },
+                                {
+                                    "bbox": [
+                                        48,
+                                        513,
+                                        299,
+                                        523
+                                    ],
+                                    "spans": [
+                                        {
+                                            "bbox": [
+                                                48,
+                                                513,
+                                                299,
+                                                523
+                                            ],
+                                            "score": 0.98,
+                                            "content": "disparity (2nd row), occlusion map (3rd row), and discontinuity",
+                                            "type": "text"
+                                        }
+                                    ]
+                                },
+                                {
+                                    "bbox": [
+                                        48,
+                                        525,
+                                        110,
+                                        535
+                                    ],
+                                    "spans": [
+                                        {
+                                            "bbox": [
+                                                48,
+                                                525,
+                                                110,
+                                                535
+                                            ],
+                                            "score": 0.99,
+                                            "content": "map (4th row).",
+                                            "type": "text"
+                                        }
+                                    ]
+                                }
+                            ]
+                        }
+                    ]
+                },
+                {
+                    "type": "text",
+                    "bbox": [
+                        47,
+                        549,
+                        299,
+                        678
+                    ],
+                    "lines": [
+                        {
+                            "bbox": [
+                                58,
+                                549,
+                                299,
+                                558
+                            ],
+                            "spans": [
+                                {
+                                    "bbox": [
+                                        58,
+                                        549,
+                                        298,
+                                        558
+                                    ],
+                                    "score": 0.98,
+                                    "content": "Theresultsof temporalstereomatching aregiveninFigure",
+                                    "type": "text"
+                                }
+                            ]
+                        },
+                        {
+                            "bbox": [
+                                47,
+                                561,
+                                299,
+                                570
+                            ],
+                            "spans": [
+                                {
+                                    "bbox": [
+                                        47,
+                                        561,
+                                        298,
+                                        570
+                                    ],
+                                    "score": 0.98,
+                                    "content": "3foruniformadditivenoiseconfinedtotherangesof±O",
+                                    "type": "text"
+                                }
+                            ]
+                        },
+                        {
+                            "bbox": [
+                                47,
+                                573,
+                                299,
+                                582
+                            ],
+                            "spans": [
+                                {
+                                    "bbox": [
+                                        49,
+                                        573,
+                                        299,
+                                        582
+                                    ],
+                                    "score": 0.96,
+                                    "content": "±20, and ±40. Each performance plot is given as a function",
+                                    "type": "text"
+                                }
+                            ]
+                        },
+                        {
+                            "bbox": [
+                                47,
+                                585,
+                                299,
+                                594
+                            ],
+                            "spans": [
+                                {
+                                    "bbox": [
+                                        48,
+                                        585,
+                                        299,
+                                        594
+                                    ],
+                                    "score": 0.95,
+                                    "content": "of the feedback coefficient X. As with the majority of temporal",
+                                    "type": "text"
+                                }
+                            ]
+                        },
+                        {
+                            "bbox": [
+                                47,
+                                597,
+                                299,
+                                606
+                            ],
+                            "spans": [
+                                {
+                                    "bbox": [
+                                        49,
+                                        597,
+                                        299,
+                                        606
+                                    ],
+                                    "score": 0.99,
+                                    "content": "stereomatching methods,improvements are negligible when",
+                                    "type": "text"
+                                }
+                            ]
+                        },
+                        {
+                            "bbox": [
+                                47,
+                                609,
+                                299,
+                                618
+                            ],
+                            "spans": [
+                                {
+                                    "bbox": [
+                                        48,
+                                        609,
+                                        299,
+                                        618
+                                    ],
+                                    "score": 0.97,
+                                    "content": "no noise is added to the images [1o], [19]. This is largely due",
+                                    "type": "text"
+                                }
+                            ]
+                        },
+                        {
+                            "bbox": [
+                                47,
+                                621,
+                                299,
+                                629
+                            ],
+                            "spans": [
+                                {
+                                    "bbox": [
+                                        48,
+                                        621,
+                                        299,
+                                        629
+                                    ],
+                                    "score": 1.0,
+                                    "content": "tothefactthatthevideousedtoevaluatethesemethodsis",
+                                    "type": "text"
+                                }
+                            ]
+                        },
+                        {
+                            "bbox": [
+                                47,
+                                633,
+                                299,
+                                641
+                            ],
+                            "spans": [
+                                {
+                                    "bbox": [
+                                        48,
+                                        633,
+                                        299,
+                                        641
+                                    ],
+                                    "score": 1.0,
+                                    "content": "computergeneratedwithverylittlenoisetostartwith,thus",
+                                    "type": "text"
+                                }
+                            ]
+                        },
+                        {
+                            "bbox": [
+                                47,
+                                644,
+                                299,
+                                654
+                            ],
+                            "spans": [
+                                {
+                                    "bbox": [
+                                        48,
+                                        644,
+                                        299,
+                                        654
+                                    ],
+                                    "score": 0.98,
+                                    "content": "the noise suppression achieved with temporal stereo matching",
+                                    "type": "text"
+                                }
+                            ]
+                        },
+                        {
+                            "bbox": [
+                                47,
+                                657,
+                                299,
+                                666
+                            ],
+                            "spans": [
+                                {
+                                    "bbox": [
+                                        48,
+                                        657,
+                                        299,
+                                        666
+                                    ],
+                                    "score": 0.98,
+                                    "content": "showslittletonoimprovementovermethodsthatoperate on",
+                                    "type": "text"
+                                }
+                            ]
+                        },
+                        {
+                            "bbox": [
+                                47,
+                                669,
+                                113,
+                                678
+                            ],
+                            "spans": [
+                                {
+                                    "bbox": [
+                                        48,
+                                        669,
+                                        113,
+                                        678
+                                    ],
+                                    "score": 1.0,
+                                    "content": "pairsofimages.",
+                                    "type": "text"
+                                }
+                            ]
+                        }
+                    ]
+                },
+                {
+                    "type": "text",
+                    "bbox": [
+                        47,
+                        680,
+                        299,
+                        725
+                    ],
+                    "lines": [
+                        {
+                            "bbox": [
+                                58,
+                                680,
+                                299,
+                                690
+                            ],
+                            "spans": [
+                                {
+                                    "bbox": [
+                                        59,
+                                        680,
+                                        298,
+                                        690
+                                    ],
+                                    "score": 0.97,
+                                    "content": "Significantimprovementsin accuracy canbeseenin Figure",
+                                    "type": "text"
+                                }
+                            ]
+                        },
+                        {
+                            "bbox": [
+                                47,
+                                692,
+                                299,
+                                701
+                            ],
+                            "spans": [
+                                {
+                                    "bbox": [
+                                        48,
+                                        692,
+                                        298,
+                                        701
+                                    ],
+                                    "score": 0.97,
+                                    "content": "3 when the noise has ranges of ±20, and ±40.In this scenario",
+                                    "type": "text"
+                                }
+                            ]
+                        },
+                        {
+                            "bbox": [
+                                47,
+                                703,
+                                299,
+                                714
+                            ],
+                            "spans": [
+                                {
+                                    "bbox": [
+                                        48,
+                                        703,
+                                        299,
+                                        714
+                                    ],
+                                    "score": 0.98,
+                                    "content": "the effect of noise in the current frame is reduced by increasing",
+                                    "type": "text"
+                                }
+                            ]
+                        },
+                        {
+                            "bbox": [
+                                47,
+                                716,
+                                299,
+                                725
+                            ],
+                            "spans": [
+                                {
+                                    "bbox": [
+                                        48,
+                                        716,
+                                        299,
+                                        725
+                                    ],
+                                    "score": 0.96,
+                                    "content": "thefeedbackcoefficientX.Thisincreasing ofXhas theeffect",
+                                    "type": "text"
+                                }
+                            ]
+                        }
+                    ]
+                },
+                {
+                    "type": "image",
+                    "bbox": [
+                        310,
+                        55,
+                        564,
+                        371
+                    ],
+                    "blocks": [
+                        {
+                            "bbox": [
+                                314,
+                                55,
+                                538,
+                                305
+                            ],
+                            "type": "image_body",
+                            "lines": [
+                                {
+                                    "bbox": [
+                                        314,
+                                        55,
+                                        538,
+                                        305
+                                    ],
+                                    "spans": [
+                                        {
+                                            "bbox": [
+                                                314,
+                                                55,
+                                                538,
+                                                305
+                                            ],
+                                            "score": 0.9999905824661255,
+                                            "type": "image",
+                                            "image_path": "c7539af438972442d0f86aa46409e6684338ddfd1fbfd6bdacf02220853ccb55.jpg"
+                                        }
+                                    ]
+                                }
+                            ]
+                        },
+                        {
+                            "bbox": [
+                                310,
+                                311,
+                                564,
+                                371
+                            ],
+                            "type": "image_caption",
+                            "lines": [
+                                {
+                                    "bbox": [
+                                        312,
+                                        313,
+                                        562,
+                                        322
+                                    ],
+                                    "spans": [
+                                        {
+                                            "bbox": [
+                                                312,
+                                                313,
+                                                562,
+                                                322
+                                            ],
+                                            "score": 0.97,
+                                            "content": "Figure 3: Performance of temporal matching at different levels",
+                                            "type": "text"
+                                        }
+                                    ]
+                                },
+                                {
+                                    "bbox": [
+                                        312,
+                                        325,
+                                        561,
+                                        334
+                                    ],
+                                    "spans": [
+                                        {
+                                            "bbox": [
+                                                312,
+                                                325,
+                                                561,
+                                                334
+                                            ],
+                                            "score": 0.98,
+                                            "content": "of uniformly distributed image noise{±0,±20,±40}.Mean",
+                                            "type": "text"
+                                        }
+                                    ]
+                                },
+                                {
+                                    "bbox": [
+                                        311,
+                                        336,
+                                        563,
+                                        347
+                                    ],
+                                    "spans": [
+                                        {
+                                            "bbox": [
+                                                311,
+                                                336,
+                                                563,
+                                                347
+                                            ],
+                                            "score": 0.99,
+                                            "content": "squared error (MSE) of disparities is plotted versus the values",
+                                            "type": "text"
+                                        }
+                                    ]
+                                },
+                                {
+                                    "bbox": [
+                                        311,
+                                        348,
+                                        561,
+                                        358
+                                    ],
+                                    "spans": [
+                                        {
+                                            "bbox": [
+                                                311,
+                                                348,
+                                                561,
+                                                358
+                                            ],
+                                            "score": 0.96,
+                                            "content": "of the feedback coefficient X. Dashed lines correspond to the",
+                                            "type": "text"
+                                        }
+                                    ]
+                                },
+                                {
+                                    "bbox": [
+                                        311,
+                                        360,
+                                        535,
+                                        371
+                                    ],
+                                    "spans": [
+                                        {
+                                            "bbox": [
+                                                311,
+                                                360,
+                                                535,
+                                                371
+                                            ],
+                                            "score": 0.96,
+                                            "content": "values of MSE obtained without temporal aggregation.",
+                                            "type": "text"
+                                        }
+                                    ]
+                                }
+                            ]
+                        }
+                    ]
+                },
+                {
+                    "type": "image",
+                    "bbox": [
+                        310,
+                        418,
+                        563,
+                        666
+                    ],
+                    "blocks": [
+                        {
+                            "bbox": [
+                                314,
+                                418,
+                                549,
+                                623
+                            ],
+                            "type": "image_body",
+                            "lines": [
+                                {
+                                    "bbox": [
+                                        314,
+                                        418,
+                                        549,
+                                        623
+                                    ],
+                                    "spans": [
+                                        {
+                                            "bbox": [
+                                                314,
+                                                418,
+                                                549,
+                                                623
+                                            ],
+                                            "score": 0.9999067783355713,
+                                            "type": "image",
+                                            "image_path": "9ac4db9197801de4a20dbc9ea17bc0c53afb7290dc8b5b45d9e92e830566cb14.jpg"
+                                        }
+                                    ]
+                                }
+                            ]
+                        },
+                        {
+                            "bbox": [
+                                310,
+                                630,
+                                563,
+                                666
+                            ],
+                            "type": "image_caption",
+                            "lines": [
+                                {
+                                    "bbox": [
+                                        312,
+                                        631,
+                                        562,
+                                        641
+                                    ],
+                                    "spans": [
+                                        {
+                                            "bbox": [
+                                                312,
+                                                631,
+                                                562,
+                                                641
+                                            ],
+                                            "score": 0.94,
+                                            "content": "Figure 4:Optimal values of the feedback coefficient \\ cor-",
+                                            "type": "text"
+                                        }
+                                    ]
+                                },
+                                {
+                                    "bbox": [
+                                        312,
+                                        644,
+                                        561,
+                                        652
+                                    ],
+                                    "spans": [
+                                        {
+                                            "bbox": [
+                                                312,
+                                                644,
+                                                561,
+                                                652
+                                            ],
+                                            "score": 0.97,
+                                            "content": "responding to the smallest mean squared error (MSE)of the",
+                                            "type": "text"
+                                        }
+                                    ]
+                                },
+                                {
+                                    "bbox": [
+                                        312,
+                                        655,
+                                        513,
+                                        665
+                                    ],
+                                    "spans": [
+                                        {
+                                            "bbox": [
+                                                312,
+                                                655,
+                                                513,
+                                                665
+                                            ],
+                                            "score": 0.97,
+                                            "content": "disparity estimates for a range of noise strengths.",
+                                            "type": "text"
+                                        }
+                                    ]
+                                }
+                            ]
+                        }
+                    ]
+                },
+                {
+                    "type": "text",
+                    "bbox": [
+                        311,
+                        692,
+                        563,
+                        725
+                    ],
+                    "lines": [
+                        {
+                            "bbox": [
+                                311,
+                                692,
+                                563,
+                                702
+                            ],
+                            "spans": [
+                                {
+                                    "bbox": [
+                                        311,
+                                        692,
+                                        562,
+                                        702
+                                    ],
+                                    "score": 0.95,
+                                    "content": "of averaging out noise in the per-pixel costs by selecting",
+                                    "type": "text"
+                                }
+                            ]
+                        },
+                        {
+                            "bbox": [
+                                311,
+                                704,
+                                563,
+                                713
+                            ],
+                            "spans": [
+                                {
+                                    "bbox": [
+                                        311,
+                                        704,
+                                        562,
+                                        713
+                                    ],
+                                    "score": 0.98,
+                                    "content": "matches based more heavily upon the auxiliary cost, which",
+                                    "type": "text"
+                                }
+                            ]
+                        },
+                        {
+                            "bbox": [
+                                311,
+                                716,
+                                563,
+                                725
+                            ],
+                            "spans": [
+                                {
+                                    "bbox": [
+                                        311,
+                                        716,
+                                        563,
+                                        725
+                                    ],
+                                    "score": 0.97,
+                                    "content": "is essentially a much more stable running average of the cost",
+                                    "type": "text"
+                                }
+                            ]
+                        }
+                    ]
+                }
+            ]
+        }
+    ],
+    "_parse_type": "ocr",
+    "_version_name": "0.7.0b1"
+}

BIN
tests/test_integrations/test_rag/assets/one_page_with_table_image.2.pdf


BIN
tests/test_integrations/test_rag/assets/one_page_with_table_image.pdf


+ 55 - 0
tests/test_integrations/test_rag/test_api.py

@@ -0,0 +1,55 @@
+import json
+import os
+import shutil
+import tempfile
+
+from magic_pdf.integrations.rag.api import DataReader, RagDocumentReader
+from magic_pdf.integrations.rag.type import CategoryType
+from magic_pdf.integrations.rag.utils import \
+    convert_middle_json_to_layout_elements
+
+
+def test_rag_document_reader():
+    # setup
+    unitest_dir = '/tmp/magic_pdf/unittest/integrations/rag'
+    os.makedirs(unitest_dir, exist_ok=True)
+    temp_output_dir = tempfile.mkdtemp(dir=unitest_dir)
+    os.makedirs(temp_output_dir, exist_ok=True)
+
+    # test
+    with open('tests/test_integrations/test_rag/assets/middle.json') as f:
+        json_data = json.load(f)
+    res = convert_middle_json_to_layout_elements(json_data, temp_output_dir)
+
+    doc = RagDocumentReader(res)
+    assert len(list(iter(doc))) == 1
+
+    page = list(iter(doc))[0]
+    assert len(list(iter(page))) == 10
+    assert len(page.get_rel_map()) == 3
+
+    item = list(iter(page))[0]
+    assert item.category_type == CategoryType.text
+
+    # teardown
+    shutil.rmtree(temp_output_dir)
+
+
+def test_data_reader():
+    # setup
+    unitest_dir = '/tmp/magic_pdf/unittest/integrations/rag'
+    os.makedirs(unitest_dir, exist_ok=True)
+    temp_output_dir = tempfile.mkdtemp(dir=unitest_dir)
+    os.makedirs(temp_output_dir, exist_ok=True)
+
+    # test
+    data_reader = DataReader('tests/test_integrations/test_rag/assets', 'ocr',
+                             temp_output_dir)
+
+    assert data_reader.get_documents_count() == 2
+    for idx in range(data_reader.get_documents_count()):
+        document = data_reader.get_document_result(idx)
+        assert document is not None
+
+    # teardown
+    shutil.rmtree(temp_output_dir)

+ 57 - 0
tests/test_integrations/test_rag/test_utils.py

@@ -0,0 +1,57 @@
+import json
+import os
+import shutil
+import tempfile
+
+from magic_pdf.integrations.rag.type import CategoryType
+from magic_pdf.integrations.rag.utils import (
+    convert_middle_json_to_layout_elements, inference)
+
+
+def test_convert_middle_json_to_layout_elements():
+    # setup
+    unitest_dir = '/tmp/magic_pdf/unittest/integrations/rag'
+    os.makedirs(unitest_dir, exist_ok=True)
+    temp_output_dir = tempfile.mkdtemp(dir=unitest_dir)
+    os.makedirs(temp_output_dir, exist_ok=True)
+
+    # test
+    with open('tests/test_integrations/test_rag/assets/middle.json') as f:
+        json_data = json.load(f)
+    res = convert_middle_json_to_layout_elements(json_data, temp_output_dir)
+
+    assert len(res) == 1
+    assert len(res[0].layout_dets) == 10
+    assert res[0].layout_dets[0].anno_id == 0
+    assert res[0].layout_dets[0].category_type == CategoryType.text
+    assert len(res[0].extra.element_relation) == 3
+
+    # teardown
+    shutil.rmtree(temp_output_dir)
+
+
+def test_inference():
+
+    asset_dir = 'tests/test_integrations/test_rag/assets'
+    # setup
+    unitest_dir = '/tmp/magic_pdf/unittest/integrations/rag'
+    os.makedirs(unitest_dir, exist_ok=True)
+    temp_output_dir = tempfile.mkdtemp(dir=unitest_dir)
+    os.makedirs(temp_output_dir, exist_ok=True)
+
+    # test
+    res = inference(
+        asset_dir + '/one_page_with_table_image.pdf',
+        temp_output_dir,
+        'ocr',
+    )
+
+    assert res is not None
+    assert len(res) == 1
+    assert len(res[0].layout_dets) == 10
+    assert res[0].layout_dets[0].anno_id == 0
+    assert res[0].layout_dets[0].category_type == CategoryType.text
+    assert len(res[0].extra.element_relation) == 3
+
+    # teardown
+    shutil.rmtree(temp_output_dir)

+ 6 - 1
tests/test_tools/test_common.py

@@ -19,7 +19,12 @@ def test_common_do_parse(method):
     # run
     with open("tests/test_tools/assets/common/cli_test_01.pdf", "rb") as f:
         bits = f.read()
-    do_parse(temp_output_dir, filename, bits, [], method, f_dump_content_list=True)
+    do_parse(temp_output_dir,
+             filename,
+             bits, [],
+             method,
+             False,
+             f_dump_content_list=True)
 
     # check
     base_output_dir = os.path.join(temp_output_dir, f"fake/{method}")