před 1 rokem · b72d4ebd94
--- a/magic_pdf/integrations/__init__.py
+++ b/magic_pdf/integrations/__init__.py
--- a/magic_pdf/integrations/rag/__init__.py
+++ b/magic_pdf/integrations/rag/__init__.py
--- a/magic_pdf/integrations/rag/api.py
+++ b/magic_pdf/integrations/rag/api.py
@@ -0,0 +1,82 @@
 
				+import os
			
 
				+from pathlib import Path
			
 
				+
			
 
				+from loguru import logger
			
 
				+
			
 
				+from magic_pdf.integrations.rag.type import (ElementRelation, LayoutElements,
			
 
				+                                             Node)
			
 
				+from magic_pdf.integrations.rag.utils import inference
			
 
				+
			
 
				+
			
 
				+class RagPageReader:
			
 
				+
			
 
				+    def __init__(self, pagedata: LayoutElements):
			
 
				+        self.o = [
			
 
				+            Node(
			
 
				+                category_type=v.category_type,
			
 
				+                text=v.text,
			
 
				+                image_path=v.image_path,
			
 
				+                anno_id=v.anno_id,
			
 
				+                latex=v.latex,
			
 
				+                html=v.html,
			
 
				+            ) for v in pagedata.layout_dets
			
 
				+        ]
			
 
				+
			
 
				+        self.pagedata = pagedata
			
 
				+
			
 
				+    def __iter__(self):
			
 
				+        return iter(self.o)
			
 
				+
			
 
				+    def get_rel_map(self) -> list[ElementRelation]:
			
 
				+        return self.pagedata.extra.element_relation
			
 
				+
			
 
				+
			
 
				+class RagDocumentReader:
			
 
				+
			
 
				+    def __init__(self, ragdata: list[LayoutElements]):
			
 
				+        self.o = [RagPageReader(v) for v in ragdata]
			
 
				+
			
 
				+    def __iter__(self):
			
 
				+        return iter(self.o)
			
 
				+
			
 
				+
			
 
				+class DataReader:
			
 
				+
			
 
				+    def __init__(self, path_or_directory: str, method: str, output_dir: str):
			
 
				+        self.path_or_directory = path_or_directory
			
 
				+        self.method = method
			
 
				+        self.output_dir = output_dir
			
 
				+        self.pdfs = []
			
 
				+        if os.path.isdir(path_or_directory):
			
 
				+            for doc_path in Path(path_or_directory).glob('*.pdf'):
			
 
				+                self.pdfs.append(doc_path)
			
 
				+        else:
			
 
				+            assert path_or_directory.endswith('.pdf')
			
 
				+            self.pdfs.append(Path(path_or_directory))
			
 
				+
			
 
				+    def get_documents_count(self) -> int:
			
 
				+        """Returns the number of documents in the directory."""
			
 
				+        return len(self.pdfs)
			
 
				+
			
 
				+    def get_document_result(self, idx: int) -> RagDocumentReader | None:
			
 
				+        """
			
 
				+        Args:
			
 
				+            idx (int): the index of documents under the
			
 
				+                directory path_or_directory
			
 
				+
			
 
				+        Returns:
			
 
				+            RagDocumentReader | None: RagDocumentReader is an iterable object,
			
 
				+            more details @RagDocumentReader
			
 
				+        """
			
 
				+        if idx >= self.get_documents_count() or idx < 0:
			
 
				+            logger.error(f'invalid idx: {idx}')
			
 
				+            return None
			
 
				+        res = inference(str(self.pdfs[idx]), self.output_dir, self.method)
			
 
				+        if res is None:
			
 
				+            logger.warning(f'failed to inference pdf {self.pdfs[idx]}')
			
 
				+            return None
			
 
				+        return RagDocumentReader(res)
			
 
				+
			
 
				+    def get_document_filename(self, idx: int) -> Path:
			
 
				+        """get the filename of the document."""
			
 
				+        return self.pdfs[idx]
			
--- a/magic_pdf/integrations/rag/type.py
+++ b/magic_pdf/integrations/rag/type.py
@@ -0,0 +1,82 @@
 
				+from enum import Enum
			
 
				+
			
 
				+from pydantic import BaseModel, Field
			
 
				+
			
 
				+
			
 
				+# rag
			
 
				+class CategoryType(Enum):  # py310 not support StrEnum
			
 
				+    text = 'text'
			
 
				+    title = 'title'
			
 
				+    interline_equation = 'interline_equation'
			
 
				+    image = 'image'
			
 
				+    image_body = 'image_body'
			
 
				+    image_caption = 'image_caption'
			
 
				+    table = 'table'
			
 
				+    table_body = 'table_body'
			
 
				+    table_caption = 'table_caption'
			
 
				+    table_footnote = 'table_footnote'
			
 
				+
			
 
				+
			
 
				+class ElementRelType(Enum):
			
 
				+    sibling = 'sibling'
			
 
				+
			
 
				+
			
 
				+class PageInfo(BaseModel):
			
 
				+    page_no: int = Field(description='the index of page, start from zero',
			
 
				+                         ge=0)
			
 
				+    height: int = Field(description='the height of page', gt=0)
			
 
				+    width: int = Field(description='the width of page', ge=0)
			
 
				+    image_path: str | None = Field(description='the image of this page',
			
 
				+                                   default=None)
			
 
				+
			
 
				+
			
 
				+class ContentObject(BaseModel):
			
 
				+    category_type: CategoryType = Field(description='类别')
			
 
				+    poly: list[float] = Field(
			
 
				+        description=('Coordinates, need to convert back to PDF coordinates,'
			
 
				+                     ' order is top-left, top-right, bottom-right, bottom-left'
			
 
				+                     ' x,y coordinates'))
			
 
				+    ignore: bool = Field(description='whether ignore this object',
			
 
				+                         default=False)
			
 
				+    text: str | None = Field(description='text content of the object',
			
 
				+                             default=None)
			
 
				+    image_path: str | None = Field(description='path of embedded image',
			
 
				+                                   default=None)
			
 
				+    order: int = Field(description='the order of this object within a page',
			
 
				+                       default=-1)
			
 
				+    anno_id: int = Field(description='unique id', default=-1)
			
 
				+    latex: str | None = Field(description='latex result', default=None)
			
 
				+    html: str | None = Field(description='html result', default=None)
			
 
				+
			
 
				+
			
 
				+class ElementRelation(BaseModel):
			
 
				+    source_anno_id: int = Field(description='unique id of the source object',
			
 
				+                                default=-1)
			
 
				+    target_anno_id: int = Field(description='unique id of the target object',
			
 
				+                                default=-1)
			
 
				+    relation: ElementRelType = Field(
			
 
				+        description='the relation between source and target element')
			
 
				+
			
 
				+
			
 
				+class LayoutElementsExtra(BaseModel):
			
 
				+    element_relation: list[ElementRelation] = Field(
			
 
				+        description='the relation between source and target element')
			
 
				+
			
 
				+
			
 
				+class LayoutElements(BaseModel):
			
 
				+    layout_dets: list[ContentObject] = Field(
			
 
				+        description='layout element details')
			
 
				+    page_info: PageInfo = Field(description='page info')
			
 
				+    extra: LayoutElementsExtra = Field(description='extra information')
			
 
				+
			
 
				+
			
 
				+# iter data format
			
 
				+class Node(BaseModel):
			
 
				+    category_type: CategoryType = Field(description='类别')
			
 
				+    text: str | None = Field(description='text content of the object',
			
 
				+                             default=None)
			
 
				+    image_path: str | None = Field(description='path of embedded image',
			
 
				+                                   default=None)
			
 
				+    anno_id: int = Field(description='unique id', default=-1)
			
 
				+    latex: str | None = Field(description='latex result', default=None)
			
 
				+    html: str | None = Field(description='html result', default=None)
			
--- a/magic_pdf/integrations/rag/utils.py
+++ b/magic_pdf/integrations/rag/utils.py
@@ -0,0 +1,285 @@
 
				+import json
			
 
				+import os
			
 
				+from pathlib import Path
			
 
				+
			
 
				+from loguru import logger
			
 
				+
			
 
				+import magic_pdf.model as model_config
			
 
				+from magic_pdf.dict2md.ocr_mkcontent import merge_para_with_text
			
 
				+from magic_pdf.integrations.rag.type import (CategoryType, ContentObject,
			
 
				+                                             ElementRelation, ElementRelType,
			
 
				+                                             LayoutElements,
			
 
				+                                             LayoutElementsExtra, PageInfo)
			
 
				+from magic_pdf.libs.ocr_content_type import BlockType, ContentType
			
 
				+from magic_pdf.rw.AbsReaderWriter import AbsReaderWriter
			
 
				+from magic_pdf.rw.DiskReaderWriter import DiskReaderWriter
			
 
				+from magic_pdf.tools.common import do_parse, prepare_env
			
 
				+
			
 
				+
			
 
				+def convert_middle_json_to_layout_elements(
			
 
				+    json_data: dict,
			
 
				+    output_dir: str,
			
 
				+) -> list[LayoutElements]:
			
 
				+    uniq_anno_id = 0
			
 
				+
			
 
				+    res: list[LayoutElements] = []
			
 
				+    for page_no, page_data in enumerate(json_data['pdf_info']):
			
 
				+        order_id = 0
			
 
				+        page_info = PageInfo(
			
 
				+            height=int(page_data['page_size'][1]),
			
 
				+            width=int(page_data['page_size'][0]),
			
 
				+            page_no=page_no,
			
 
				+        )
			
 
				+        layout_dets: list[ContentObject] = []
			
 
				+        extra_element_relation: list[ElementRelation] = []
			
 
				+
			
 
				+        for para_block in page_data['para_blocks']:
			
 
				+            para_text = ''
			
 
				+            para_type = para_block['type']
			
 
				+
			
 
				+            if para_type == BlockType.Text:
			
 
				+                para_text = merge_para_with_text(para_block)
			
 
				+                x0, y0, x1, y1 = para_block['bbox']
			
 
				+                content = ContentObject(
			
 
				+                    anno_id=uniq_anno_id,
			
 
				+                    category_type=CategoryType.text,
			
 
				+                    text=para_text,
			
 
				+                    order=order_id,
			
 
				+                    poly=[x0, y0, x1, y0, x1, y1, x0, y1],
			
 
				+                )
			
 
				+                uniq_anno_id += 1
			
 
				+                order_id += 1
			
 
				+                layout_dets.append(content)
			
 
				+
			
 
				+            elif para_type == BlockType.Title:
			
 
				+                para_text = merge_para_with_text(para_block)
			
 
				+                x0, y0, x1, y1 = para_block['bbox']
			
 
				+                content = ContentObject(
			
 
				+                    anno_id=uniq_anno_id,
			
 
				+                    category_type=CategoryType.title,
			
 
				+                    text=para_text,
			
 
				+                    order=order_id,
			
 
				+                    poly=[x0, y0, x1, y0, x1, y1, x0, y1],
			
 
				+                )
			
 
				+                uniq_anno_id += 1
			
 
				+                order_id += 1
			
 
				+                layout_dets.append(content)
			
 
				+
			
 
				+            elif para_type == BlockType.InterlineEquation:
			
 
				+                para_text = merge_para_with_text(para_block)
			
 
				+                x0, y0, x1, y1 = para_block['bbox']
			
 
				+                content = ContentObject(
			
 
				+                    anno_id=uniq_anno_id,
			
 
				+                    category_type=CategoryType.interline_equation,
			
 
				+                    text=para_text,
			
 
				+                    order=order_id,
			
 
				+                    poly=[x0, y0, x1, y0, x1, y1, x0, y1],
			
 
				+                )
			
 
				+                uniq_anno_id += 1
			
 
				+                order_id += 1
			
 
				+                layout_dets.append(content)
			
 
				+
			
 
				+            elif para_type == BlockType.Image:
			
 
				+                body_anno_id = -1
			
 
				+                caption_anno_id = -1
			
 
				+
			
 
				+                for block in para_block['blocks']:
			
 
				+                    if block['type'] == BlockType.ImageBody:
			
 
				+                        for line in block['lines']:
			
 
				+                            for span in line['spans']:
			
 
				+                                if span['type'] == ContentType.Image:
			
 
				+                                    x0, y0, x1, y1 = block['bbox']
			
 
				+                                    content = ContentObject(
			
 
				+                                        anno_id=uniq_anno_id,
			
 
				+                                        category_type=CategoryType.image_body,
			
 
				+                                        image_path=os.path.join(
			
 
				+                                            output_dir, span['image_path']),
			
 
				+                                        order=order_id,
			
 
				+                                        poly=[x0, y0, x1, y0, x1, y1, x0, y1],
			
 
				+                                    )
			
 
				+                                    body_anno_id = uniq_anno_id
			
 
				+                                    uniq_anno_id += 1
			
 
				+                                    order_id += 1
			
 
				+                                    layout_dets.append(content)
			
 
				+
			
 
				+                for block in para_block['blocks']:
			
 
				+                    if block['type'] == BlockType.ImageCaption:
			
 
				+                        para_text += merge_para_with_text(block)
			
 
				+                        x0, y0, x1, y1 = block['bbox']
			
 
				+                        content = ContentObject(
			
 
				+                            anno_id=uniq_anno_id,
			
 
				+                            category_type=CategoryType.image_caption,
			
 
				+                            text=para_text,
			
 
				+                            order=order_id,
			
 
				+                            poly=[x0, y0, x1, y0, x1, y1, x0, y1],
			
 
				+                        )
			
 
				+                        caption_anno_id = uniq_anno_id
			
 
				+                        uniq_anno_id += 1
			
 
				+                        order_id += 1
			
 
				+                        layout_dets.append(content)
			
 
				+
			
 
				+                if body_anno_id > 0 and caption_anno_id > 0:
			
 
				+                    element_relation = ElementRelation(
			
 
				+                        relation=ElementRelType.sibling,
			
 
				+                        source_anno_id=body_anno_id,
			
 
				+                        target_anno_id=caption_anno_id,
			
 
				+                    )
			
 
				+                    extra_element_relation.append(element_relation)
			
 
				+
			
 
				+            elif para_type == BlockType.Table:
			
 
				+                body_anno_id, caption_anno_id, footnote_anno_id = -1, -1, -1
			
 
				+
			
 
				+                for block in para_block['blocks']:
			
 
				+                    if block['type'] == BlockType.TableCaption:
			
 
				+                        para_text += merge_para_with_text(block)
			
 
				+                        x0, y0, x1, y1 = block['bbox']
			
 
				+                        content = ContentObject(
			
 
				+                            anno_id=uniq_anno_id,
			
 
				+                            category_type=CategoryType.table_caption,
			
 
				+                            text=para_text,
			
 
				+                            order=order_id,
			
 
				+                            poly=[x0, y0, x1, y0, x1, y1, x0, y1],
			
 
				+                        )
			
 
				+                        caption_anno_id = uniq_anno_id
			
 
				+                        uniq_anno_id += 1
			
 
				+                        order_id += 1
			
 
				+                        layout_dets.append(content)
			
 
				+
			
 
				+                for block in para_block['blocks']:
			
 
				+                    if block['type'] == BlockType.TableBody:
			
 
				+                        for line in block['lines']:
			
 
				+                            for span in line['spans']:
			
 
				+                                if span['type'] == ContentType.Table:
			
 
				+                                    x0, y0, x1, y1 = para_block['bbox']
			
 
				+                                    content = ContentObject(
			
 
				+                                        anno_id=uniq_anno_id,
			
 
				+                                        category_type=CategoryType.table_body,
			
 
				+                                        order=order_id,
			
 
				+                                        poly=[x0, y0, x1, y0, x1, y1, x0, y1],
			
 
				+                                    )
			
 
				+                                    body_anno_id = uniq_anno_id
			
 
				+                                    uniq_anno_id += 1
			
 
				+                                    order_id += 1
			
 
				+                                    # if processed by table model
			
 
				+                                    if span.get('latex', ''):
			
 
				+                                        content.latex = span['latex']
			
 
				+                                    else:
			
 
				+                                        content.image_path = os.path.join(
			
 
				+                                            output_dir, span['image_path'])
			
 
				+                                    layout_dets.append(content)
			
 
				+
			
 
				+                for block in para_block['blocks']:
			
 
				+                    if block['type'] == BlockType.TableFootnote:
			
 
				+                        para_text += merge_para_with_text(block)
			
 
				+                        x0, y0, x1, y1 = block['bbox']
			
 
				+                        content = ContentObject(
			
 
				+                            anno_id=uniq_anno_id,
			
 
				+                            category_type=CategoryType.table_footnote,
			
 
				+                            text=para_text,
			
 
				+                            order=order_id,
			
 
				+                            poly=[x0, y0, x1, y0, x1, y1, x0, y1],
			
 
				+                        )
			
 
				+                        footnote_anno_id = uniq_anno_id
			
 
				+                        uniq_anno_id += 1
			
 
				+                        order_id += 1
			
 
				+                        layout_dets.append(content)
			
 
				+
			
 
				+                if caption_anno_id != -1 and body_anno_id != -1:
			
 
				+                    element_relation = ElementRelation(
			
 
				+                        relation=ElementRelType.sibling,
			
 
				+                        source_anno_id=body_anno_id,
			
 
				+                        target_anno_id=caption_anno_id,
			
 
				+                    )
			
 
				+                    extra_element_relation.append(element_relation)
			
 
				+
			
 
				+                if footnote_anno_id != -1 and body_anno_id != -1:
			
 
				+                    element_relation = ElementRelation(
			
 
				+                        relation=ElementRelType.sibling,
			
 
				+                        source_anno_id=body_anno_id,
			
 
				+                        target_anno_id=footnote_anno_id,
			
 
				+                    )
			
 
				+                    extra_element_relation.append(element_relation)
			
 
				+
			
 
				+        res.append(
			
 
				+            LayoutElements(
			
 
				+                page_info=page_info,
			
 
				+                layout_dets=layout_dets,
			
 
				+                extra=LayoutElementsExtra(
			
 
				+                    element_relation=extra_element_relation),
			
 
				+            ))
			
 
				+
			
 
				+    return res
			
 
				+
			
 
				+
			
 
				+def inference(path, output_dir, method):
			
 
				+    model_config.__use_inside_model__ = True
			
 
				+    model_config.__model_mode__ = 'full'
			
 
				+    if output_dir == '':
			
 
				+        if os.path.isdir(path):
			
 
				+            output_dir = os.path.join(path, 'output')
			
 
				+        else:
			
 
				+            output_dir = os.path.join(os.path.dirname(path), 'output')
			
 
				+
			
 
				+    local_image_dir, local_md_dir = prepare_env(output_dir,
			
 
				+                                                str(Path(path).stem), method)
			
 
				+
			
 
				+    def read_fn(path):
			
 
				+        disk_rw = DiskReaderWriter(os.path.dirname(path))
			
 
				+        return disk_rw.read(os.path.basename(path), AbsReaderWriter.MODE_BIN)
			
 
				+
			
 
				+    def parse_doc(doc_path: str):
			
 
				+        try:
			
 
				+            file_name = str(Path(doc_path).stem)
			
 
				+            pdf_data = read_fn(doc_path)
			
 
				+            do_parse(
			
 
				+                output_dir,
			
 
				+                file_name,
			
 
				+                pdf_data,
			
 
				+                [],
			
 
				+                method,
			
 
				+                False,
			
 
				+                f_draw_span_bbox=False,
			
 
				+                f_draw_layout_bbox=False,
			
 
				+                f_dump_md=False,
			
 
				+                f_dump_middle_json=True,
			
 
				+                f_dump_model_json=False,
			
 
				+                f_dump_orig_pdf=False,
			
 
				+                f_dump_content_list=False,
			
 
				+                f_draw_model_bbox=False,
			
 
				+            )
			
 
				+
			
 
				+            middle_json_fn = os.path.join(local_md_dir,
			
 
				+                                          f'{file_name}_middle.json')
			
 
				+            with open(middle_json_fn) as fd:
			
 
				+                jso = json.load(fd)
			
 
				+            os.remove(middle_json_fn)
			
 
				+            return convert_middle_json_to_layout_elements(jso, local_image_dir)
			
 
				+
			
 
				+        except Exception as e:
			
 
				+            logger.exception(e)
			
 
				+
			
 
				+    return parse_doc(path)
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    import pprint
			
 
				+
			
 
				+    base_dir = '/opt/data/pdf/resources/samples/'
			
 
				+    if 0:
			
 
				+        with open(base_dir + 'json_outputs/middle.json') as f:
			
 
				+            d = json.load(f)
			
 
				+        result = convert_middle_json_to_layout_elements(d, '/tmp')
			
 
				+        pprint.pp(result)
			
 
				+    if 0:
			
 
				+        with open(base_dir + 'json_outputs/middle.3.json') as f:
			
 
				+            d = json.load(f)
			
 
				+        result = convert_middle_json_to_layout_elements(d, '/tmp')
			
 
				+        pprint.pp(result)
			
 
				+
			
 
				+    if 1:
			
 
				+        res = inference(
			
 
				+            base_dir + 'samples/pdf/one_page_with_table_image.pdf',
			
 
				+            '/tmp/output',
			
 
				+            'ocr',
			
 
				+        )
			
 
				+        pprint.pp(res)
			
--- a/magic_pdf/tools/cli_dev.py
+++ b/magic_pdf/tools/cli_dev.py
@@ -86,6 +86,7 @@ def jsonl(jsonl, method, output_dir):
 
				         pdf_data,
			
 
				         jso['doc_layout_result'],
			
 
				         method,
			
 
				+        False,
			
 
				         f_dump_content_list=True,
			
 
				         f_draw_model_bbox=True,
			
 
				     )
			
@@ -141,6 +142,7 @@ def pdf(pdf, json_data, output_dir, method):
 
				         pdf_data,
			
 
				         model_json_list,
			
 
				         method,
			
 
				+        False,
			
 
				         f_dump_content_list=True,
			
 
				         f_draw_model_bbox=True,
			
 
				     )
			
--- a/projects/llama_index_rag/README.md
+++ b/projects/llama_index_rag/README.md
@@ -0,0 +1,157 @@
 
				+## 安装
			
 
				+
			
 
				+MinerU
			
 
				+
			
 
				+```bash
			
 
				+git clone https://github.com/opendatalab/MinerU.git
			
 
				+cd MinerU
			
 
				+
			
 
				+conda create -n MinerU python=3.10
			
 
				+conda activate MinerU
			
 
				+pip install .[full] --extra-index-url https://wheels.myhloli.com
			
 
				+```
			
 
				+
			
 
				+第三方软件
			
 
				+
			
 
				+```bash
			
 
				+# install
			
 
				+pip install llama-index-vector-stores-elasticsearch==0.2.0
			
 
				+pip install llama-index-embeddings-dashscope==0.2.0
			
 
				+pip install llama-index-core==0.10.68
			
 
				+pip install einops==0.7.0
			
 
				+pip install transformers-stream-generator==0.0.5
			
 
				+pip install accelerate==0.33.0
			
 
				+
			
 
				+# uninstall
			
 
				+pip uninstall transformer-engine
			
 
				+```
			
 
				+
			
 
				+## 环境配置
			
 
				+
			
 
				+```
			
 
				+export DASHSCOPE_API_KEY={some_key}
			
 
				+export ES_USER={some_es_user}
			
 
				+export ES_PASSWORD={some_es_password}
			
 
				+export ES_URL=http://{es_url}:9200
			
 
				+```
			
 
				+
			
 
				+DASHSCOPE_API_KEY 的开通参考[文档](https://help.aliyun.com/zh/dashscope/opening-service)
			
 
				+
			
 
				+## 使用
			
 
				+
			
 
				+### 导入数据
			
 
				+
			
 
				+```bash
			
 
				+python data_ingestion.py -p some.pdf  # load data from pdf
			
 
				+
			
 
				+    or
			
 
				+
			
 
				+python data_ingestion.py -p /opt/data/some_pdf_directory/ # load data from multiples pdf which under the directory of {some_pdf_directory}
			
 
				+```
			
 
				+
			
 
				+### 查询
			
 
				+
			
 
				+```bash
			
 
				+python query.py --question '{the_question_you_want_to_ask}'
			
 
				+```
			
 
				+
			
 
				+## 示例
			
 
				+
			
 
				+````bash
			
 
				+# 启动 es 服务
			
 
				+docker compose up -d
			
 
				+
			
 
				+or
			
 
				+
			
 
				+docker-compose up -d
			
 
				+
			
 
				+
			
 
				+# 配置环境变量
			
 
				+export ES_USER=elastic
			
 
				+export ES_PASSWORD=llama_index
			
 
				+export ES_URL=http://127.0.0.1:9200
			
 
				+
			
 
				+
			
 
				+# 导入数据
			
 
				+python data_ingestion.py example/data/declaration_of_the_rights_of_man_1789.pdf
			
 
				+
			
 
				+
			
 
				+# 查询问题
			
 
				+python query.py -q 'how about the rights of men'
			
 
				+
			
 
				+## outputs
			
 
				+请基于```内的内容回答问题。"
			
 
				+            ```
			
 
				+            I. Men are born, and always continue, free and equal in respect of their rights. Civil distinctions, therefore, can be founded only on public utility.
			
 
				+            ```
			
 
				+            我的问题是：how about the rights of men。
			
 
				+
			
 
				+question: how about the rights of men
			
 
				+answer: The statement implies that men are born free and equal in terms of their rights. Civil distinctions should only be based on public utility. However, it does not specify what those rights are. It is up to society and individual countries to determine and protect the specific rights of their citizens.
			
 
				+
			
 
				+````
			
 
				+
			
 
				+## 开发
			
 
				+
			
 
				+`MinerU` 提供了 `RAG` 集成接口，用户可以通过指定输入单个 `pdf` 文件或者某个目录。`MinerU` 会自动解析输入文件并返回可以迭代的接口用于获取数据
			
 
				+
			
 
				+### API 接口
			
 
				+
			
 
				+```python
			
 
				+from magic_pdf.integrations.rag.type import Node
			
 
				+
			
 
				+class RagPageReader:
			
 
				+    def get_rel_map(self) -> list[ElementRelation]:
			
 
				+        # 获取节点的间的关系
			
 
				+        pass
			
 
				+    ...
			
 
				+
			
 
				+class RagDocumentReader:
			
 
				+    ...
			
 
				+
			
 
				+class DataReader:
			
 
				+    def __init__(self, path_or_directory: str, method: str, output_dir: str):
			
 
				+        pass
			
 
				+
			
 
				+    def get_documents_count(self) -> int:
			
 
				+        """获取 pdf 文档数量"""
			
 
				+        pass
			
 
				+
			
 
				+    def get_document_result(self, idx: int) -> RagDocumentReader | None:
			
 
				+        """获取某个 pdf 的解析内容"""
			
 
				+        pass
			
 
				+
			
 
				+
			
 
				+    def get_document_filename(self, idx: int) -> Path:
			
 
				+        """获取某个 pdf 的具体的路径"""
			
 
				+        pass
			
 
				+
			
 
				+
			
 
				+```
			
 
				+
			
 
				+类型定义
			
 
				+
			
 
				+```python
			
 
				+
			
 
				+class Node(BaseModel):
			
 
				+    category_type: CategoryType = Field(description='类别') # 类别
			
 
				+    text: str | None = Field(description='文本内容',
			
 
				+                             default=None)
			
 
				+    image_path: str | None = Field(description='图或者表格（表可能用图片形式存储）的存储路径',
			
 
				+                                   default=None)
			
 
				+    anno_id: int = Field(description='unique id', default=-1)
			
 
				+    latex: str | None = Field(description='公式或表格 latex 解析结果', default=None)
			
 
				+    html: str | None = Field(description='表格的 html 解析结果', default=None)
			
 
				+
			
 
				+```
			
 
				+
			
 
				+表格存储形式可能会是 图片、latex、html 三种形式之一。
			
 
				+anno_id 是该 Node 的在全局唯一ID。后续可以用于匹配该 Node 和其他 Node 的关系。节点的关系可以通过方法 `get_rel_map` 获取。用户可以用 `anno_id` 匹配节点之间的关系，并用于构建具备节点的关系的 rag index。
			
 
				+
			
 
				+### 节点类型关系矩阵
			
 
				+
			
 
				+|                | image_body | table_body |
			
 
				+| -------------- | ---------- | ---------- |
			
 
				+| image_caption  | sibling    |            |
			
 
				+| table_caption  |            | sibling    |
			
 
				+| table_footnote |            | sibling    |
			
--- a/projects/llama_index_rag/data_ingestion.py
+++ b/projects/llama_index_rag/data_ingestion.py
@@ -0,0 +1,68 @@
 
				+import os
			
 
				+
			
 
				+import click
			
 
				+from llama_index.core.schema import TextNode
			
 
				+from llama_index.embeddings.dashscope import (DashScopeEmbedding,
			
 
				+                                              DashScopeTextEmbeddingModels,
			
 
				+                                              DashScopeTextEmbeddingType)
			
 
				+from llama_index.vector_stores.elasticsearch import ElasticsearchStore
			
 
				+
			
 
				+from magic_pdf.integrations.rag.api import DataReader
			
 
				+
			
 
				+es_vec_store = ElasticsearchStore(
			
 
				+    index_name='rag_index',
			
 
				+    es_url=os.getenv('ES_URL', 'http://127.0.0.1:9200'),
			
 
				+    es_user=os.getenv('ES_USER', 'elastic'),
			
 
				+    es_password=os.getenv('ES_PASSWORD', 'llama_index'),
			
 
				+)
			
 
				+
			
 
				+
			
 
				+# Create embeddings
			
 
				+# text_type=`document` to build index
			
 
				+def embed_node(node):
			
 
				+    embedder = DashScopeEmbedding(
			
 
				+        model_name=DashScopeTextEmbeddingModels.TEXT_EMBEDDING_V2,
			
 
				+        text_type=DashScopeTextEmbeddingType.TEXT_TYPE_DOCUMENT,
			
 
				+    )
			
 
				+
			
 
				+    result_embeddings = embedder.get_text_embedding(node.text)
			
 
				+    node.embedding = result_embeddings
			
 
				+    return node
			
 
				+
			
 
				+
			
 
				+@click.command()
			
 
				+@click.option(
			
 
				+    '-p',
			
 
				+    '--path',
			
 
				+    'path',
			
 
				+    type=click.Path(exists=True),
			
 
				+    required=True,
			
 
				+    help='local pdf filepath or directory',
			
 
				+)
			
 
				+def cli(path):
			
 
				+    output_dir = '/tmp/magic_pdf/integrations/rag/'
			
 
				+    os.makedirs(output_dir, exist_ok=True)
			
 
				+    documents = DataReader(path, 'ocr', output_dir)
			
 
				+
			
 
				+    # build nodes
			
 
				+    nodes = []
			
 
				+
			
 
				+    for idx in range(documents.get_documents_count()):
			
 
				+        doc = documents.get_document_result(idx)
			
 
				+        if doc is None:  # something wrong happens when parse pdf !
			
 
				+            continue
			
 
				+
			
 
				+        for page in iter(
			
 
				+                doc):  # iterate documents from initial page to last page !
			
 
				+            for element in iter(page):  # iterate the element from all page !
			
 
				+                if element.text is None:
			
 
				+                    continue
			
 
				+                nodes.append(
			
 
				+                    embed_node(
			
 
				+                        TextNode(text=element.text,
			
 
				+                                 metadata={'purpose': 'demo'})))
			
 
				+    es_vec_store.add(nodes)
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    cli()
			
--- a/projects/llama_index_rag/docker-compose.yml
+++ b/projects/llama_index_rag/docker-compose.yml
@@ -0,0 +1,24 @@
 
				+services:
			
 
				+  es:
			
 
				+    container_name: es
			
 
				+    image: docker.elastic.co/elasticsearch/elasticsearch:8.11.3
			
 
				+    volumes:
			
 
				+      - esdata01:/usr/share/elasticsearch/data
			
 
				+    ports:
			
 
				+      - 9200:9200
			
 
				+    environment:
			
 
				+      - node.name=es
			
 
				+      - ELASTIC_PASSWORD=llama_index
			
 
				+      - bootstrap.memory_lock=false
			
 
				+      - discovery.type=single-node
			
 
				+      - xpack.security.enabled=true
			
 
				+      - xpack.security.http.ssl.enabled=false
			
 
				+      - xpack.security.transport.ssl.enabled=false
			
 
				+    ulimits:
			
 
				+      memlock:
			
 
				+        soft: -1
			
 
				+        hard: -1
			
 
				+    restart: always
			
 
				+volumes:
			
 
				+  esdata01:
			
 
				+    driver: local
			
--- a/projects/llama_index_rag/example/data/declaration_of_the_rights_of_man_1789.pdf
+++ b/projects/llama_index_rag/example/data/declaration_of_the_rights_of_man_1789.pdf
--- a/projects/llama_index_rag/query.py
+++ b/projects/llama_index_rag/query.py
@@ -0,0 +1,84 @@
 
				+import os
			
 
				+
			
 
				+import click
			
 
				+from llama_index.core.vector_stores.types import VectorStoreQuery
			
 
				+from llama_index.embeddings.dashscope import (DashScopeEmbedding,
			
 
				+                                              DashScopeTextEmbeddingModels,
			
 
				+                                              DashScopeTextEmbeddingType)
			
 
				+from llama_index.vector_stores.elasticsearch import (AsyncDenseVectorStrategy,
			
 
				+                                                     ElasticsearchStore)
			
 
				+# initialize qwen 7B model
			
 
				+from modelscope import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
			
 
				+
			
 
				+es_vector_store = ElasticsearchStore(
			
 
				+    index_name='rag_index',
			
 
				+    es_url=os.getenv('ES_URL', 'http://127.0.0.1:9200'),
			
 
				+    es_user=os.getenv('ES_USER', 'elastic'),
			
 
				+    es_password=os.getenv('ES_PASSWORD', 'llama_index'),
			
 
				+    retrieval_strategy=AsyncDenseVectorStrategy(),
			
 
				+)
			
 
				+
			
 
				+
			
 
				+def embed_text(text):
			
 
				+    embedder = DashScopeEmbedding(
			
 
				+        model_name=DashScopeTextEmbeddingModels.TEXT_EMBEDDING_V2,
			
 
				+        text_type=DashScopeTextEmbeddingType.TEXT_TYPE_DOCUMENT,
			
 
				+    )
			
 
				+    return embedder.get_text_embedding(text)
			
 
				+
			
 
				+
			
 
				+def search(vector_store: ElasticsearchStore, query: str):
			
 
				+    query_vec = VectorStoreQuery(query_embedding=embed_text(query))
			
 
				+    result = vector_store.query(query_vec)
			
 
				+    return '\n'.join([node.text for node in result.nodes])
			
 
				+
			
 
				+
			
 
				+@click.command()
			
 
				+@click.option(
			
 
				+    '-q',
			
 
				+    '--question',
			
 
				+    'question',
			
 
				+    required=True,
			
 
				+    help='ask what you want to know!',
			
 
				+)
			
 
				+def cli(question):
			
 
				+    tokenizer = AutoTokenizer.from_pretrained('qwen/Qwen-7B-Chat',
			
 
				+                                              revision='v1.0.5',
			
 
				+                                              trust_remote_code=True)
			
 
				+    model = AutoModelForCausalLM.from_pretrained('qwen/Qwen-7B-Chat',
			
 
				+                                                 revision='v1.0.5',
			
 
				+                                                 device_map='auto',
			
 
				+                                                 trust_remote_code=True,
			
 
				+                                                 fp32=True).eval()
			
 
				+    model.generation_config = GenerationConfig.from_pretrained(
			
 
				+        'Qwen/Qwen-7B-Chat', revision='v1.0.5', trust_remote_code=True)
			
 
				+
			
 
				+    # define a prompt template for the vectorDB-enhanced LLM generation
			
 
				+    def answer_question(question, context, model):
			
 
				+        if context == '':
			
 
				+            prompt = question
			
 
				+        else:
			
 
				+            prompt = f'''请基于```内的内容回答问题。"
			
 
				+            ```
			
 
				+            {context}
			
 
				+            ```
			
 
				+            我的问题是：{question}。
			
 
				+            '''
			
 
				+        history = None
			
 
				+        print(prompt)
			
 
				+        response, history = model.chat(tokenizer, prompt, history=None)
			
 
				+        return response
			
 
				+
			
 
				+    answer = answer_question(question, search(es_vector_store, question),
			
 
				+                             model)
			
 
				+    print(f'question: {question}\n'
			
 
				+          f'answer: {answer}')
			
 
				+
			
 
				+
			
 
				+"""
			
 
				+
			
 
				+python query.py -q 'how about the rights of men'
			
 
				+"""
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    cli()
			
--- a/requirements-docker.txt
+++ b/requirements-docker.txt
@@ -15,4 +15,4 @@ paddleocr==2.7.3
 
				 paddlepaddle==3.0.0b1
			
 
				 pypandoc
			
 
				 struct-eqtable==0.1.0
			
 
				-detectron2
			
 
				+detectron2
			
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,11 +1,12 @@
 
				 boto3>=1.28.43
			
 
				 Brotli>=1.1.0
			
 
				 click>=8.1.7
			
 
				-PyMuPDF>=1.24.9
			
 
				+fast-langdetect==0.2.0
			
 
				 loguru>=0.6.0
			
 
				 numpy>=1.21.6,<2.0.0
			
 
				-fast-langdetect==0.2.0
			
 
				-wordninja>=2.0.0
			
 
				-scikit-learn>=1.0.2
			
 
				 pdfminer.six==20231228
			
 
				+pydantic>=2.7.2,<2.8.0
			
 
				+PyMuPDF>=1.24.9
			
 
				+scikit-learn>=1.0.2
			
 
				+wordninja>=2.0.0
			
 
				 # The requirements.txt must ensure that only necessary external dependencies are introduced. If there are new dependencies to add, please contact the project administrator.
			
--- a/tests/test_integrations/test_rag/assets/middle.json
+++ b/tests/test_integrations/test_rag/assets/middle.json
@@ -0,0 +1,2302 @@
 
				+{
			
 
				+    "pdf_info": [
			
 
				+        {
			
 
				+            "preproc_blocks": [
			
 
				+                {
			
 
				+                    "type": "text",
			
 
				+                    "bbox": [
			
 
				+                        47,
			
 
				+                        57,
			
 
				+                        299,
			
 
				+                        93
			
 
				+                    ],
			
 
				+                    "lines": [
			
 
				+                        {
			
 
				+                            "bbox": [
			
 
				+                                47,
			
 
				+                                57,
			
 
				+                                299,
			
 
				+                                68
			
 
				+                            ],
			
 
				+                            "spans": [
			
 
				+                                {
			
 
				+                                    "bbox": [
			
 
				+                                        49,
			
 
				+                                        57,
			
 
				+                                        298,
			
 
				+                                        68
			
 
				+                                    ],
			
 
				+                                    "score": 0.98,
			
 
				+                                    "content": "of the synthetic stereo scene from a single camera perspective",
			
 
				+                                    "type": "text"
			
 
				+                                }
			
 
				+                            ]
			
 
				+                        },
			
 
				+                        {
			
 
				+                            "bbox": [
			
 
				+                                47,
			
 
				+                                71,
			
 
				+                                299,
			
 
				+                                80
			
 
				+                            ],
			
 
				+                            "spans": [
			
 
				+                                {
			
 
				+                                    "bbox": [
			
 
				+                                        49,
			
 
				+                                        71,
			
 
				+                                        299,
			
 
				+                                        80
			
 
				+                                    ],
			
 
				+                                    "score": 0.96,
			
 
				+                                    "content": "along with the ground truth disparity,occlusion map,and",
			
 
				+                                    "type": "text"
			
 
				+                                }
			
 
				+                            ]
			
 
				+                        },
			
 
				+                        {
			
 
				+                            "bbox": [
			
 
				+                                47,
			
 
				+                                82,
			
 
				+                                123,
			
 
				+                                93
			
 
				+                            ],
			
 
				+                            "spans": [
			
 
				+                                {
			
 
				+                                    "bbox": [
			
 
				+                                        49,
			
 
				+                                        82,
			
 
				+                                        123,
			
 
				+                                        93
			
 
				+                                    ],
			
 
				+                                    "score": 0.99,
			
 
				+                                    "content": "discontinuitymap.",
			
 
				+                                    "type": "text"
			
 
				+                                }
			
 
				+                            ]
			
 
				+                        }
			
 
				+                    ]
			
 
				+                },
			
 
				+                {
			
 
				+                    "type": "image",
			
 
				+                    "bbox": [
			
 
				+                        47,
			
 
				+                        100,
			
 
				+                        301,
			
 
				+                        535
			
 
				+                    ],
			
 
				+                    "blocks": [
			
 
				+                        {
			
 
				+                            "bbox": [
			
 
				+                                51,
			
 
				+                                100,
			
 
				+                                292,
			
 
				+                                484
			
 
				+                            ],
			
 
				+                            "type": "image_body",
			
 
				+                            "lines": [
			
 
				+                                {
			
 
				+                                    "bbox": [
			
 
				+                                        51,
			
 
				+                                        100,
			
 
				+                                        292,
			
 
				+                                        484
			
 
				+                                    ],
			
 
				+                                    "spans": [
			
 
				+                                        {
			
 
				+                                            "bbox": [
			
 
				+                                                51,
			
 
				+                                                100,
			
 
				+                                                292,
			
 
				+                                                484
			
 
				+                                            ],
			
 
				+                                            "score": 0.9999815225601196,
			
 
				+                                            "type": "image",
			
 
				+                                            "image_path": "b07d74524eac6f46b5505b48b1e10db23f2b45cb2d21d5fec72e967e61255811.jpg"
			
 
				+                                        }
			
 
				+                                    ]
			
 
				+                                }
			
 
				+                            ]
			
 
				+                        },
			
 
				+                        {
			
 
				+                            "bbox": [
			
 
				+                                47,
			
 
				+                                488,
			
 
				+                                301,
			
 
				+                                535
			
 
				+                            ],
			
 
				+                            "type": "image_caption",
			
 
				+                            "lines": [
			
 
				+                                {
			
 
				+                                    "bbox": [
			
 
				+                                        49,
			
 
				+                                        490,
			
 
				+                                        299,
			
 
				+                                        499
			
 
				+                                    ],
			
 
				+                                    "spans": [
			
 
				+                                        {
			
 
				+                                            "bbox": [
			
 
				+                                                49,
			
 
				+                                                490,
			
 
				+                                                299,
			
 
				+                                                499
			
 
				+                                            ],
			
 
				+                                            "score": 1.0,
			
 
				+                                            "content": "Figure2:Twosampleframesfromthesyntheticvideose-",
			
 
				+                                            "type": "text"
			
 
				+                                        }
			
 
				+                                    ]
			
 
				+                                },
			
 
				+                                {
			
 
				+                                    "bbox": [
			
 
				+                                        48,
			
 
				+                                        501,
			
 
				+                                        300,
			
 
				+                                        512
			
 
				+                                    ],
			
 
				+                                    "spans": [
			
 
				+                                        {
			
 
				+                                            "bbox": [
			
 
				+                                                48,
			
 
				+                                                501,
			
 
				+                                                300,
			
 
				+                                                512
			
 
				+                                            ],
			
 
				+                                            "score": 1.0,
			
 
				+                                            "content": "quence (1st row), along with their corresponding ground truth",
			
 
				+                                            "type": "text"
			
 
				+                                        }
			
 
				+                                    ]
			
 
				+                                },
			
 
				+                                {
			
 
				+                                    "bbox": [
			
 
				+                                        48,
			
 
				+                                        513,
			
 
				+                                        299,
			
 
				+                                        523
			
 
				+                                    ],
			
 
				+                                    "spans": [
			
 
				+                                        {
			
 
				+                                            "bbox": [
			
 
				+                                                48,
			
 
				+                                                513,
			
 
				+                                                299,
			
 
				+                                                523
			
 
				+                                            ],
			
 
				+                                            "score": 0.98,
			
 
				+                                            "content": "disparity (2nd row), occlusion map (3rd row), and discontinuity",
			
 
				+                                            "type": "text"
			
 
				+                                        }
			
 
				+                                    ]
			
 
				+                                },
			
 
				+                                {
			
 
				+                                    "bbox": [
			
 
				+                                        48,
			
 
				+                                        525,
			
 
				+                                        110,
			
 
				+                                        535
			
 
				+                                    ],
			
 
				+                                    "spans": [
			
 
				+                                        {
			
 
				+                                            "bbox": [
			
 
				+                                                48,
			
 
				+                                                525,
			
 
				+                                                110,
			
 
				+                                                535
			
 
				+                                            ],
			
 
				+                                            "score": 0.99,
			
 
				+                                            "content": "map (4th row).",
			
 
				+                                            "type": "text"
			
 
				+                                        }
			
 
				+                                    ]
			
 
				+                                }
			
 
				+                            ]
			
 
				+                        }
			
 
				+                    ]
			
 
				+                },
			
 
				+                {
			
 
				+                    "type": "text",
			
 
				+                    "bbox": [
			
 
				+                        47,
			
 
				+                        549,
			
 
				+                        299,
			
 
				+                        678
			
 
				+                    ],
			
 
				+                    "lines": [
			
 
				+                        {
			
 
				+                            "bbox": [
			
 
				+                                58,
			
 
				+                                549,
			
 
				+                                299,
			
 
				+                                558
			
 
				+                            ],
			
 
				+                            "spans": [
			
 
				+                                {
			
 
				+                                    "bbox": [
			
 
				+                                        58,
			
 
				+                                        549,
			
 
				+                                        298,
			
 
				+                                        558
			
 
				+                                    ],
			
 
				+                                    "score": 0.98,
			
 
				+                                    "content": "Theresultsof temporalstereomatching aregiveninFigure",
			
 
				+                                    "type": "text"
			
 
				+                                }
			
 
				+                            ]
			
 
				+                        },
			
 
				+                        {
			
 
				+                            "bbox": [
			
 
				+                                47,
			
 
				+                                561,
			
 
				+                                299,
			
 
				+                                570
			
 
				+                            ],
			
 
				+                            "spans": [
			
 
				+                                {
			
 
				+                                    "bbox": [
			
 
				+                                        47,
			
 
				+                                        561,
			
 
				+                                        298,
			
 
				+                                        570
			
 
				+                                    ],
			
 
				+                                    "score": 0.98,
			
 
				+                                    "content": "3foruniformadditivenoiseconfinedtotherangesof±O",
			
 
				+                                    "type": "text"
			
 
				+                                }
			
 
				+                            ]
			
 
				+                        },
			
 
				+                        {
			
 
				+                            "bbox": [
			
 
				+                                47,
			
 
				+                                573,
			
 
				+                                299,
			
 
				+                                582
			
 
				+                            ],
			
 
				+                            "spans": [
			
 
				+                                {
			
 
				+                                    "bbox": [
			
 
				+                                        49,
			
 
				+                                        573,
			
 
				+                                        299,
			
 
				+                                        582
			
 
				+                                    ],
			
 
				+                                    "score": 0.96,
			
 
				+                                    "content": "±20, and ±40. Each performance plot is given as a function",
			
 
				+                                    "type": "text"
			
 
				+                                }
			
 
				+                            ]
			
 
				+                        },
			
 
				+                        {
			
 
				+                            "bbox": [
			
 
				+                                47,
			
 
				+                                585,
			
 
				+                                299,
			
 
				+                                594
			
 
				+                            ],
			
 
				+                            "spans": [
			
 
				+                                {
			
 
				+                                    "bbox": [
			
 
				+                                        48,
			
 
				+                                        585,
			
 
				+                                        299,
			
 
				+                                        594
			
 
				+                                    ],
			
 
				+                                    "score": 0.95,
			
 
				+                                    "content": "of the feedback coefficient X. As with the majority of temporal",
			
 
				+                                    "type": "text"
			
 
				+                                }
			
 
				+                            ]
			
 
				+                        },
			
 
				+                        {
			
 
				+                            "bbox": [
			
 
				+                                47,
			
 
				+                                597,
			
 
				+                                299,
			
 
				+                                606
			
 
				+                            ],
			
 
				+                            "spans": [
			
 
				+                                {
			
 
				+                                    "bbox": [
			
 
				+                                        49,
			
 
				+                                        597,
			
 
				+                                        299,
			
 
				+                                        606
			
 
				+                                    ],
			
 
				+                                    "score": 0.99,
			
 
				+                                    "content": "stereomatching methods,improvements are negligible when",
			
 
				+                                    "type": "text"
			
 
				+                                }
			
 
				+                            ]
			
 
				+                        },
			
 
				+                        {
			
 
				+                            "bbox": [
			
 
				+                                47,
			
 
				+                                609,
			
 
				+                                299,
			
 
				+                                618
			
 
				+                            ],
			
 
				+                            "spans": [
			
 
				+                                {
			
 
				+                                    "bbox": [
			
 
				+                                        48,
			
 
				+                                        609,
			
 
				+                                        299,
			
 
				+                                        618
			
 
				+                                    ],
			
 
				+                                    "score": 0.97,
			
 
				+                                    "content": "no noise is added to the images [1o], [19]. This is largely due",
			
 
				+                                    "type": "text"
			
 
				+                                }
			
 
				+                            ]
			
 
				+                        },
			
 
				+                        {
			
 
				+                            "bbox": [
			
 
				+                                47,
			
 
				+                                621,
			
 
				+                                299,
			
 
				+                                629
			
 
				+                            ],
			
 
				+                            "spans": [
			
 
				+                                {
			
 
				+                                    "bbox": [
			
 
				+                                        48,
			
 
				+                                        621,
			
 
				+                                        299,
			
 
				+                                        629
			
 
				+                                    ],
			
 
				+                                    "score": 1.0,
			
 
				+                                    "content": "tothefactthatthevideousedtoevaluatethesemethodsis",
			
 
				+                                    "type": "text"
			
 
				+                                }
			
 
				+                            ]
			
 
				+                        },
			
 
				+                        {
			
 
				+                            "bbox": [
			
 
				+                                47,
			
 
				+                                633,
			
 
				+                                299,
			
 
				+                                641
			
 
				+                            ],
			
 
				+                            "spans": [
			
 
				+                                {
			
 
				+                                    "bbox": [
			
 
				+                                        48,
			
 
				+                                        633,
			
 
				+                                        299,
			
 
				+                                        641
			
 
				+                                    ],
			
 
				+                                    "score": 1.0,
			
 
				+                                    "content": "computergeneratedwithverylittlenoisetostartwith,thus",
			
 
				+                                    "type": "text"
			
 
				+                                }
			
 
				+                            ]
			
 
				+                        },
			
 
				+                        {
			
 
				+                            "bbox": [
			
 
				+                                47,
			
 
				+                                644,
			
 
				+                                299,
			
 
				+                                654
			
 
				+                            ],
			
 
				+                            "spans": [
			
 
				+                                {
			
 
				+                                    "bbox": [
			
 
				+                                        48,
			
 
				+                                        644,
			
 
				+                                        299,
			
 
				+                                        654
			
 
				+                                    ],
			
 
				+                                    "score": 0.98,
			
 
				+                                    "content": "the noise suppression achieved with temporal stereo matching",
			
 
				+                                    "type": "text"
			
 
				+                                }
			
 
				+                            ]
			
 
				+                        },
			
 
				+                        {
			
 
				+                            "bbox": [
			
 
				+                                47,
			
 
				+                                657,
			
 
				+                                299,
			
 
				+                                666
			
 
				+                            ],
			
 
				+                            "spans": [
			
 
				+                                {
			
 
				+                                    "bbox": [
			
 
				+                                        48,
			
 
				+                                        657,
			
 
				+                                        299,
			
 
				+                                        666
			
 
				+                                    ],
			
 
				+                                    "score": 0.98,
			
 
				+                                    "content": "showslittletonoimprovementovermethodsthatoperate on",
			
 
				+                                    "type": "text"
			
 
				+                                }
			
 
				+                            ]
			
 
				+                        },
			
 
				+                        {
			
 
				+                            "bbox": [
			
 
				+                                47,
			
 
				+                                669,
			
 
				+                                113,
			
 
				+                                678
			
 
				+                            ],
			
 
				+                            "spans": [
			
 
				+                                {
			
 
				+                                    "bbox": [
			
 
				+                                        48,
			
 
				+                                        669,
			
 
				+                                        113,
			
 
				+                                        678
			
 
				+                                    ],
			
 
				+                                    "score": 1.0,
			
 
				+                                    "content": "pairsofimages.",
			
 
				+                                    "type": "text"
			
 
				+                                }
			
 
				+                            ]
			
 
				+                        }
			
 
				+                    ]
			
 
				+                },
			
 
				+                {
			
 
				+                    "type": "text",
			
 
				+                    "bbox": [
			
 
				+                        47,
			
 
				+                        680,
			
 
				+                        299,
			
 
				+                        725
			
 
				+                    ],
			
 
				+                    "lines": [
			
 
				+                        {
			
 
				+                            "bbox": [
			
 
				+                                58,
			
 
				+                                680,
			
 
				+                                299,
			
 
				+                                690
			
 
				+                            ],
			
 
				+                            "spans": [
			
 
				+                                {
			
 
				+                                    "bbox": [
			
 
				+                                        59,
			
 
				+                                        680,
			
 
				+                                        298,
			
 
				+                                        690
			
 
				+                                    ],
			
 
				+                                    "score": 0.97,
			
 
				+                                    "content": "Significantimprovementsin accuracy canbeseenin Figure",
			
 
				+                                    "type": "text"
			
 
				+                                }
			
 
				+                            ]
			
 
				+                        },
			
 
				+                        {
			
 
				+                            "bbox": [
			
 
				+                                47,
			
 
				+                                692,
			
 
				+                                299,
			
 
				+                                701
			
 
				+                            ],
			
 
				+                            "spans": [
			
 
				+                                {
			
 
				+                                    "bbox": [
			
 
				+                                        48,
			
 
				+                                        692,
			
 
				+                                        298,
			
 
				+                                        701
			
 
				+                                    ],
			
 
				+                                    "score": 0.97,
			
 
				+                                    "content": "3 when the noise has ranges of ±20, and ±40.In this scenario",
			
 
				+                                    "type": "text"
			
 
				+                                }
			
 
				+                            ]
			
 
				+                        },
			
 
				+                        {
			
 
				+                            "bbox": [
			
 
				+                                47,
			
 
				+                                703,
			
 
				+                                299,
			
 
				+                                714
			
 
				+                            ],
			
 
				+                            "spans": [
			
 
				+                                {
			
 
				+                                    "bbox": [
			
 
				+                                        48,
			
 
				+                                        703,
			
 
				+                                        299,
			
 
				+                                        714
			
 
				+                                    ],
			
 
				+                                    "score": 0.98,
			
 
				+                                    "content": "the effect of noise in the current frame is reduced by increasing",
			
 
				+                                    "type": "text"
			
 
				+                                }
			
 
				+                            ]
			
 
				+                        },
			
 
				+                        {
			
 
				+                            "bbox": [
			
 
				+                                47,
			
 
				+                                716,
			
 
				+                                299,
			
 
				+                                725
			
 
				+                            ],
			
 
				+                            "spans": [
			
 
				+                                {
			
 
				+                                    "bbox": [
			
 
				+                                        48,
			
 
				+                                        716,
			
 
				+                                        299,
			
 
				+                                        725
			
 
				+                                    ],
			
 
				+                                    "score": 0.96,
			
 
				+                                    "content": "thefeedbackcoefficientX.Thisincreasing ofXhas theeffect",
			
 
				+                                    "type": "text"
			
 
				+                                }
			
 
				+                            ]
			
 
				+                        }
			
 
				+                    ]
			
 
				+                },
			
 
				+                {
			
 
				+                    "type": "image",
			
 
				+                    "bbox": [
			
 
				+                        310,
			
 
				+                        55,
			
 
				+                        564,
			
 
				+                        371
			
 
				+                    ],
			
 
				+                    "blocks": [
			
 
				+                        {
			
 
				+                            "bbox": [
			
 
				+                                314,
			
 
				+                                55,
			
 
				+                                538,
			
 
				+                                305
			
 
				+                            ],
			
 
				+                            "type": "image_body",
			
 
				+                            "lines": [
			
 
				+                                {
			
 
				+                                    "bbox": [
			
 
				+                                        314,
			
 
				+                                        55,
			
 
				+                                        538,
			
 
				+                                        305
			
 
				+                                    ],
			
 
				+                                    "spans": [
			
 
				+                                        {
			
 
				+                                            "bbox": [
			
 
				+                                                314,
			
 
				+                                                55,
			
 
				+                                                538,
			
 
				+                                                305
			
 
				+                                            ],
			
 
				+                                            "score": 0.9999905824661255,
			
 
				+                                            "type": "image",
			
 
				+                                            "image_path": "c7539af438972442d0f86aa46409e6684338ddfd1fbfd6bdacf02220853ccb55.jpg"
			
 
				+                                        }
			
 
				+                                    ]
			
 
				+                                }
			
 
				+                            ]
			
 
				+                        },
			
 
				+                        {
			
 
				+                            "bbox": [
			
 
				+                                310,
			
 
				+                                311,
			
 
				+                                564,
			
 
				+                                371
			
 
				+                            ],
			
 
				+                            "type": "image_caption",
			
 
				+                            "lines": [
			
 
				+                                {
			
 
				+                                    "bbox": [
			
 
				+                                        312,
			
 
				+                                        313,
			
 
				+                                        562,
			
 
				+                                        322
			
 
				+                                    ],
			
 
				+                                    "spans": [
			
 
				+                                        {
			
 
				+                                            "bbox": [
			
 
				+                                                312,
			
 
				+                                                313,
			
 
				+                                                562,
			
 
				+                                                322
			
 
				+                                            ],
			
 
				+                                            "score": 0.97,
			
 
				+                                            "content": "Figure 3: Performance of temporal matching at different levels",
			
 
				+                                            "type": "text"
			
 
				+                                        }
			
 
				+                                    ]
			
 
				+                                },
			
 
				+                                {
			
 
				+                                    "bbox": [
			
 
				+                                        312,
			
 
				+                                        325,
			
 
				+                                        561,
			
 
				+                                        334
			
 
				+                                    ],
			
 
				+                                    "spans": [
			
 
				+                                        {
			
 
				+                                            "bbox": [
			
 
				+                                                312,
			
 
				+                                                325,
			
 
				+                                                561,
			
 
				+                                                334
			
 
				+                                            ],
			
 
				+                                            "score": 0.98,
			
 
				+                                            "content": "of uniformly distributed image noise{±0,±20,±40}.Mean",
			
 
				+                                            "type": "text"
			
 
				+                                        }
			
 
				+                                    ]
			
 
				+                                },
			
 
				+                                {
			
 
				+                                    "bbox": [
			
 
				+                                        311,
			
 
				+                                        336,
			
 
				+                                        563,
			
 
				+                                        347
			
 
				+                                    ],
			
 
				+                                    "spans": [
			
 
				+                                        {
			
 
				+                                            "bbox": [
			
 
				+                                                311,
			
 
				+                                                336,
			
 
				+                                                563,
			
 
				+                                                347
			
 
				+                                            ],
			
 
				+                                            "score": 0.99,
			
 
				+                                            "content": "squared error (MSE) of disparities is plotted versus the values",
			
 
				+                                            "type": "text"
			
 
				+                                        }
			
 
				+                                    ]
			
 
				+                                },
			
 
				+                                {
			
 
				+                                    "bbox": [
			
 
				+                                        311,
			
 
				+                                        348,
			
 
				+                                        561,
			
 
				+                                        358
			
 
				+                                    ],
			
 
				+                                    "spans": [
			
 
				+                                        {
			
 
				+                                            "bbox": [
			
 
				+                                                311,
			
 
				+                                                348,
			
 
				+                                                561,
			
 
				+                                                358
			
 
				+                                            ],
			
 
				+                                            "score": 0.96,
			
 
				+                                            "content": "of the feedback coefficient X. Dashed lines correspond to the",
			
 
				+                                            "type": "text"
			
 
				+                                        }
			
 
				+                                    ]
			
 
				+                                },
			
 
				+                                {
			
 
				+                                    "bbox": [
			
 
				+                                        311,
			
 
				+                                        360,
			
 
				+                                        535,
			
 
				+                                        371
			
 
				+                                    ],
			
 
				+                                    "spans": [
			
 
				+                                        {
			
 
				+                                            "bbox": [
			
 
				+                                                311,
			
 
				+                                                360,
			
 
				+                                                535,
			
 
				+                                                371
			
 
				+                                            ],
			
 
				+                                            "score": 0.96,
			
 
				+                                            "content": "values of MSE obtained without temporal aggregation.",
			
 
				+                                            "type": "text"
			
 
				+                                        }
			
 
				+                                    ]
			
 
				+                                }
			
 
				+                            ]
			
 
				+                        }
			
 
				+                    ]
			
 
				+                },
			
 
				+                {
			
 
				+                    "type": "image",
			
 
				+                    "bbox": [
			
 
				+                        310,
			
 
				+                        418,
			
 
				+                        563,
			
 
				+                        666
			
 
				+                    ],
			
 
				+                    "blocks": [
			
 
				+                        {
			
 
				+                            "bbox": [
			
 
				+                                314,
			
 
				+                                418,
			
 
				+                                549,
			
 
				+                                623
			
 
				+                            ],
			
 
				+                            "type": "image_body",
			
 
				+                            "lines": [
			
 
				+                                {
			
 
				+                                    "bbox": [
			
 
				+                                        314,
			
 
				+                                        418,
			
 
				+                                        549,
			
 
				+                                        623
			
 
				+                                    ],
			
 
				+                                    "spans": [
			
 
				+                                        {
			
 
				+                                            "bbox": [
			
 
				+                                                314,
			
 
				+                                                418,
			
 
				+                                                549,
			
 
				+                                                623
			
 
				+                                            ],
			
 
				+                                            "score": 0.9999067783355713,
			
 
				+                                            "type": "image",
			
 
				+                                            "image_path": "9ac4db9197801de4a20dbc9ea17bc0c53afb7290dc8b5b45d9e92e830566cb14.jpg"
			
 
				+                                        }
			
 
				+                                    ]
			
 
				+                                }
			
 
				+                            ]
			
 
				+                        },
			
 
				+                        {
			
 
				+                            "bbox": [
			
 
				+                                310,
			
 
				+                                630,
			
 
				+                                563,
			
 
				+                                666
			
 
				+                            ],
			
 
				+                            "type": "image_caption",
			
 
				+                            "lines": [
			
 
				+                                {
			
 
				+                                    "bbox": [
			
 
				+                                        312,
			
 
				+                                        631,
			
 
				+                                        562,
			
 
				+                                        641
			
 
				+                                    ],
			
 
				+                                    "spans": [
			
 
				+                                        {
			
 
				+                                            "bbox": [
			
 
				+                                                312,
			
 
				+                                                631,
			
 
				+                                                562,
			
 
				+                                                641
			
 
				+                                            ],
			
 
				+                                            "score": 0.94,
			
 
				+                                            "content": "Figure 4:Optimal values of the feedback coefficient \\ cor-",
			
 
				+                                            "type": "text"
			
 
				+                                        }
			
 
				+                                    ]
			
 
				+                                },
			
 
				+                                {
			
 
				+                                    "bbox": [
			
 
				+                                        312,
			
 
				+                                        644,
			
 
				+                                        561,
			
 
				+                                        652
			
 
				+                                    ],
			
 
				+                                    "spans": [
			
 
				+                                        {
			
 
				+                                            "bbox": [
			
 
				+                                                312,
			
 
				+                                                644,
			
 
				+                                                561,
			
 
				+                                                652
			
 
				+                                            ],
			
 
				+                                            "score": 0.97,
			
 
				+                                            "content": "responding to the smallest mean squared error (MSE)of the",
			
 
				+                                            "type": "text"
			
 
				+                                        }
			
 
				+                                    ]
			
 
				+                                },
			
 
				+                                {
			
 
				+                                    "bbox": [
			
 
				+                                        312,
			
 
				+                                        655,
			
 
				+                                        513,
			
 
				+                                        665
			
 
				+                                    ],
			
 
				+                                    "spans": [
			
 
				+                                        {
			
 
				+                                            "bbox": [
			
 
				+                                                312,
			
 
				+                                                655,
			
 
				+                                                513,
			
 
				+                                                665
			
 
				+                                            ],
			
 
				+                                            "score": 0.97,
			
 
				+                                            "content": "disparity estimates for a range of noise strengths.",
			
 
				+                                            "type": "text"
			
 
				+                                        }
			
 
				+                                    ]
			
 
				+                                }
			
 
				+                            ]
			
 
				+                        }
			
 
				+                    ]
			
 
				+                },
			
 
				+                {
			
 
				+                    "type": "text",
			
 
				+                    "bbox": [
			
 
				+                        311,
			
 
				+                        692,
			
 
				+                        563,
			
 
				+                        725
			
 
				+                    ],
			
 
				+                    "lines": [
			
 
				+                        {
			
 
				+                            "bbox": [
			
 
				+                                311,
			
 
				+                                692,
			
 
				+                                563,
			
 
				+                                702
			
 
				+                            ],
			
 
				+                            "spans": [
			
 
				+                                {
			
 
				+                                    "bbox": [
			
 
				+                                        311,
			
 
				+                                        692,
			
 
				+                                        562,
			
 
				+                                        702
			
 
				+                                    ],
			
 
				+                                    "score": 0.95,
			
 
				+                                    "content": "of averaging out noise in the per-pixel costs by selecting",
			
 
				+                                    "type": "text"
			
 
				+                                }
			
 
				+                            ]
			
 
				+                        },
			
 
				+                        {
			
 
				+                            "bbox": [
			
 
				+                                311,
			
 
				+                                704,
			
 
				+                                563,
			
 
				+                                713
			
 
				+                            ],
			
 
				+                            "spans": [
			
 
				+                                {
			
 
				+                                    "bbox": [
			
 
				+                                        311,
			
 
				+                                        704,
			
 
				+                                        562,
			
 
				+                                        713
			
 
				+                                    ],
			
 
				+                                    "score": 0.98,
			
 
				+                                    "content": "matches based more heavily upon the auxiliary cost, which",
			
 
				+                                    "type": "text"
			
 
				+                                }
			
 
				+                            ]
			
 
				+                        },
			
 
				+                        {
			
 
				+                            "bbox": [
			
 
				+                                311,
			
 
				+                                716,
			
 
				+                                563,
			
 
				+                                725
			
 
				+                            ],
			
 
				+                            "spans": [
			
 
				+                                {
			
 
				+                                    "bbox": [
			
 
				+                                        311,
			
 
				+                                        716,
			
 
				+                                        563,
			
 
				+                                        725
			
 
				+                                    ],
			
 
				+                                    "score": 0.97,
			
 
				+                                    "content": "is essentially a much more stable running average of the cost",
			
 
				+                                    "type": "text"
			
 
				+                                }
			
 
				+                            ]
			
 
				+                        }
			
 
				+                    ]
			
 
				+                }
			
 
				+            ],
			
 
				+            "layout_bboxes": [
			
 
				+                {
			
 
				+                    "layout_bbox": [
			
 
				+                        47,
			
 
				+                        55,
			
 
				+                        301,
			
 
				+                        726
			
 
				+                    ],
			
 
				+                    "layout_label": "V",
			
 
				+                    "sub_layout": []
			
 
				+                },
			
 
				+                {
			
 
				+                    "layout_bbox": [
			
 
				+                        310,
			
 
				+                        55,
			
 
				+                        564,
			
 
				+                        726
			
 
				+                    ],
			
 
				+                    "layout_label": "V",
			
 
				+                    "sub_layout": []
			
 
				+                }
			
 
				+            ],
			
 
				+            "page_idx": 0,
			
 
				+            "page_size": [
			
 
				+                612.0,
			
 
				+                792.0
			
 
				+            ],
			
 
				+            "_layout_tree": [
			
 
				+                {
			
 
				+                    "layout_bbox": [
			
 
				+                        0,
			
 
				+                        55,
			
 
				+                        612.0,
			
 
				+                        726
			
 
				+                    ],
			
 
				+                    "layout_label": "V",
			
 
				+                    "sub_layout": [
			
 
				+                        {
			
 
				+                            "layout_bbox": [
			
 
				+                                47,
			
 
				+                                55,
			
 
				+                                564,
			
 
				+                                726
			
 
				+                            ],
			
 
				+                            "layout_label": "H",
			
 
				+                            "sub_layout": [
			
 
				+                                {
			
 
				+                                    "layout_bbox": [
			
 
				+                                        47,
			
 
				+                                        55,
			
 
				+                                        301,
			
 
				+                                        726
			
 
				+                                    ],
			
 
				+                                    "layout_label": "V",
			
 
				+                                    "sub_layout": []
			
 
				+                                },
			
 
				+                                {
			
 
				+                                    "layout_bbox": [
			
 
				+                                        310,
			
 
				+                                        55,
			
 
				+                                        564,
			
 
				+                                        726
			
 
				+                                    ],
			
 
				+                                    "layout_label": "V",
			
 
				+                                    "sub_layout": []
			
 
				+                                }
			
 
				+                            ]
			
 
				+                        }
			
 
				+                    ]
			
 
				+                }
			
 
				+            ],
			
 
				+            "images": [
			
 
				+                {
			
 
				+                    "type": "image",
			
 
				+                    "bbox": [
			
 
				+                        47,
			
 
				+                        100,
			
 
				+                        301,
			
 
				+                        535
			
 
				+                    ],
			
 
				+                    "blocks": [
			
 
				+                        {
			
 
				+                            "bbox": [
			
 
				+                                51,
			
 
				+                                100,
			
 
				+                                292,
			
 
				+                                484
			
 
				+                            ],
			
 
				+                            "type": "image_body",
			
 
				+                            "lines": [
			
 
				+                                {
			
 
				+                                    "bbox": [
			
 
				+                                        51,
			
 
				+                                        100,
			
 
				+                                        292,
			
 
				+                                        484
			
 
				+                                    ],
			
 
				+                                    "spans": [
			
 
				+                                        {
			
 
				+                                            "bbox": [
			
 
				+                                                51,
			
 
				+                                                100,
			
 
				+                                                292,
			
 
				+                                                484
			
 
				+                                            ],
			
 
				+                                            "score": 0.9999815225601196,
			
 
				+                                            "type": "image",
			
 
				+                                            "image_path": "b07d74524eac6f46b5505b48b1e10db23f2b45cb2d21d5fec72e967e61255811.jpg"
			
 
				+                                        }
			
 
				+                                    ]
			
 
				+                                }
			
 
				+                            ]
			
 
				+                        },
			
 
				+                        {
			
 
				+                            "bbox": [
			
 
				+                                47,
			
 
				+                                488,
			
 
				+                                301,
			
 
				+                                535
			
 
				+                            ],
			
 
				+                            "type": "image_caption",
			
 
				+                            "lines": [
			
 
				+                                {
			
 
				+                                    "bbox": [
			
 
				+                                        49,
			
 
				+                                        490,
			
 
				+                                        299,
			
 
				+                                        499
			
 
				+                                    ],
			
 
				+                                    "spans": [
			
 
				+                                        {
			
 
				+                                            "bbox": [
			
 
				+                                                49,
			
 
				+                                                490,
			
 
				+                                                299,
			
 
				+                                                499
			
 
				+                                            ],
			
 
				+                                            "score": 1.0,
			
 
				+                                            "content": "Figure2:Twosampleframesfromthesyntheticvideose-",
			
 
				+                                            "type": "text"
			
 
				+                                        }
			
 
				+                                    ]
			
 
				+                                },
			
 
				+                                {
			
 
				+                                    "bbox": [
			
 
				+                                        48,
			
 
				+                                        501,
			
 
				+                                        300,
			
 
				+                                        512
			
 
				+                                    ],
			
 
				+                                    "spans": [
			
 
				+                                        {
			
 
				+                                            "bbox": [
			
 
				+                                                48,
			
 
				+                                                501,
			
 
				+                                                300,
			
 
				+                                                512
			
 
				+                                            ],
			
 
				+                                            "score": 1.0,
			
 
				+                                            "content": "quence (1st row), along with their corresponding ground truth",
			
 
				+                                            "type": "text"
			
 
				+                                        }
			
 
				+                                    ]
			
 
				+                                },
			
 
				+                                {
			
 
				+                                    "bbox": [
			
 
				+                                        48,
			
 
				+                                        513,
			
 
				+                                        299,
			
 
				+                                        523
			
 
				+                                    ],
			
 
				+                                    "spans": [
			
 
				+                                        {
			
 
				+                                            "bbox": [
			
 
				+                                                48,
			
 
				+                                                513,
			
 
				+                                                299,
			
 
				+                                                523
			
 
				+                                            ],
			
 
				+                                            "score": 0.98,
			
 
				+                                            "content": "disparity (2nd row), occlusion map (3rd row), and discontinuity",
			
 
				+                                            "type": "text"
			
 
				+                                        }
			
 
				+                                    ]
			
 
				+                                },
			
 
				+                                {
			
 
				+                                    "bbox": [
			
 
				+                                        48,
			
 
				+                                        525,
			
 
				+                                        110,
			
 
				+                                        535
			
 
				+                                    ],
			
 
				+                                    "spans": [
			
 
				+                                        {
			
 
				+                                            "bbox": [
			
 
				+                                                48,
			
 
				+                                                525,
			
 
				+                                                110,
			
 
				+                                                535
			
 
				+                                            ],
			
 
				+                                            "score": 0.99,
			
 
				+                                            "content": "map (4th row).",
			
 
				+                                            "type": "text"
			
 
				+                                        }
			
 
				+                                    ]
			
 
				+                                }
			
 
				+                            ]
			
 
				+                        }
			
 
				+                    ]
			
 
				+                },
			
 
				+                {
			
 
				+                    "type": "image",
			
 
				+                    "bbox": [
			
 
				+                        310,
			
 
				+                        55,
			
 
				+                        564,
			
 
				+                        371
			
 
				+                    ],
			
 
				+                    "blocks": [
			
 
				+                        {
			
 
				+                            "bbox": [
			
 
				+                                314,
			
 
				+                                55,
			
 
				+                                538,
			
 
				+                                305
			
 
				+                            ],
			
 
				+                            "type": "image_body",
			
 
				+                            "lines": [
			
 
				+                                {
			
 
				+                                    "bbox": [
			
 
				+                                        314,
			
 
				+                                        55,
			
 
				+                                        538,
			
 
				+                                        305
			
 
				+                                    ],
			
 
				+                                    "spans": [
			
 
				+                                        {
			
 
				+                                            "bbox": [
			
 
				+                                                314,
			
 
				+                                                55,
			
 
				+                                                538,
			
 
				+                                                305
			
 
				+                                            ],
			
 
				+                                            "score": 0.9999905824661255,
			
 
				+                                            "type": "image",
			
 
				+                                            "image_path": "c7539af438972442d0f86aa46409e6684338ddfd1fbfd6bdacf02220853ccb55.jpg"
			
 
				+                                        }
			
 
				+                                    ]
			
 
				+                                }
			
 
				+                            ]
			
 
				+                        },
			
 
				+                        {
			
 
				+                            "bbox": [
			
 
				+                                310,
			
 
				+                                311,
			
 
				+                                564,
			
 
				+                                371
			
 
				+                            ],
			
 
				+                            "type": "image_caption",
			
 
				+                            "lines": [
			
 
				+                                {
			
 
				+                                    "bbox": [
			
 
				+                                        312,
			
 
				+                                        313,
			
 
				+                                        562,
			
 
				+                                        322
			
 
				+                                    ],
			
 
				+                                    "spans": [
			
 
				+                                        {
			
 
				+                                            "bbox": [
			
 
				+                                                312,
			
 
				+                                                313,
			
 
				+                                                562,
			
 
				+                                                322
			
 
				+                                            ],
			
 
				+                                            "score": 0.97,
			
 
				+                                            "content": "Figure 3: Performance of temporal matching at different levels",
			
 
				+                                            "type": "text"
			
 
				+                                        }
			
 
				+                                    ]
			
 
				+                                },
			
 
				+                                {
			
 
				+                                    "bbox": [
			
 
				+                                        312,
			
 
				+                                        325,
			
 
				+                                        561,
			
 
				+                                        334
			
 
				+                                    ],
			
 
				+                                    "spans": [
			
 
				+                                        {
			
 
				+                                            "bbox": [
			
 
				+                                                312,
			
 
				+                                                325,
			
 
				+                                                561,
			
 
				+                                                334
			
 
				+                                            ],
			
 
				+                                            "score": 0.98,
			
 
				+                                            "content": "of uniformly distributed image noise{±0,±20,±40}.Mean",
			
 
				+                                            "type": "text"
			
 
				+                                        }
			
 
				+                                    ]
			
 
				+                                },
			
 
				+                                {
			
 
				+                                    "bbox": [
			
 
				+                                        311,
			
 
				+                                        336,
			
 
				+                                        563,
			
 
				+                                        347
			
 
				+                                    ],
			
 
				+                                    "spans": [
			
 
				+                                        {
			
 
				+                                            "bbox": [
			
 
				+                                                311,
			
 
				+                                                336,
			
 
				+                                                563,
			
 
				+                                                347
			
 
				+                                            ],
			
 
				+                                            "score": 0.99,
			
 
				+                                            "content": "squared error (MSE) of disparities is plotted versus the values",
			
 
				+                                            "type": "text"
			
 
				+                                        }
			
 
				+                                    ]
			
 
				+                                },
			
 
				+                                {
			
 
				+                                    "bbox": [
			
 
				+                                        311,
			
 
				+                                        348,
			
 
				+                                        561,
			
 
				+                                        358
			
 
				+                                    ],
			
 
				+                                    "spans": [
			
 
				+                                        {
			
 
				+                                            "bbox": [
			
 
				+                                                311,
			
 
				+                                                348,
			
 
				+                                                561,
			
 
				+                                                358
			
 
				+                                            ],
			
 
				+                                            "score": 0.96,
			
 
				+                                            "content": "of the feedback coefficient X. Dashed lines correspond to the",
			
 
				+                                            "type": "text"
			
 
				+                                        }
			
 
				+                                    ]
			
 
				+                                },
			
 
				+                                {
			
 
				+                                    "bbox": [
			
 
				+                                        311,
			
 
				+                                        360,
			
 
				+                                        535,
			
 
				+                                        371
			
 
				+                                    ],
			
 
				+                                    "spans": [
			
 
				+                                        {
			
 
				+                                            "bbox": [
			
 
				+                                                311,
			
 
				+                                                360,
			
 
				+                                                535,
			
 
				+                                                371
			
 
				+                                            ],
			
 
				+                                            "score": 0.96,
			
 
				+                                            "content": "values of MSE obtained without temporal aggregation.",
			
 
				+                                            "type": "text"
			
 
				+                                        }
			
 
				+                                    ]
			
 
				+                                }
			
 
				+                            ]
			
 
				+                        }
			
 
				+                    ]
			
 
				+                },
			
 
				+                {
			
 
				+                    "type": "image",
			
 
				+                    "bbox": [
			
 
				+                        310,
			
 
				+                        418,
			
 
				+                        563,
			
 
				+                        666
			
 
				+                    ],
			
 
				+                    "blocks": [
			
 
				+                        {
			
 
				+                            "bbox": [
			
 
				+                                314,
			
 
				+                                418,
			
 
				+                                549,
			
 
				+                                623
			
 
				+                            ],
			
 
				+                            "type": "image_body",
			
 
				+                            "lines": [
			
 
				+                                {
			
 
				+                                    "bbox": [
			
 
				+                                        314,
			
 
				+                                        418,
			
 
				+                                        549,
			
 
				+                                        623
			
 
				+                                    ],
			
 
				+                                    "spans": [
			
 
				+                                        {
			
 
				+                                            "bbox": [
			
 
				+                                                314,
			
 
				+                                                418,
			
 
				+                                                549,
			
 
				+                                                623
			
 
				+                                            ],
			
 
				+                                            "score": 0.9999067783355713,
			
 
				+                                            "type": "image",
			
 
				+                                            "image_path": "9ac4db9197801de4a20dbc9ea17bc0c53afb7290dc8b5b45d9e92e830566cb14.jpg"
			
 
				+                                        }
			
 
				+                                    ]
			
 
				+                                }
			
 
				+                            ]
			
 
				+                        },
			
 
				+                        {
			
 
				+                            "bbox": [
			
 
				+                                310,
			
 
				+                                630,
			
 
				+                                563,
			
 
				+                                666
			
 
				+                            ],
			
 
				+                            "type": "image_caption",
			
 
				+                            "lines": [
			
 
				+                                {
			
 
				+                                    "bbox": [
			
 
				+                                        312,
			
 
				+                                        631,
			
 
				+                                        562,
			
 
				+                                        641
			
 
				+                                    ],
			
 
				+                                    "spans": [
			
 
				+                                        {
			
 
				+                                            "bbox": [
			
 
				+                                                312,
			
 
				+                                                631,
			
 
				+                                                562,
			
 
				+                                                641
			
 
				+                                            ],
			
 
				+                                            "score": 0.94,
			
 
				+                                            "content": "Figure 4:Optimal values of the feedback coefficient \\ cor-",
			
 
				+                                            "type": "text"
			
 
				+                                        }
			
 
				+                                    ]
			
 
				+                                },
			
 
				+                                {
			
 
				+                                    "bbox": [
			
 
				+                                        312,
			
 
				+                                        644,
			
 
				+                                        561,
			
 
				+                                        652
			
 
				+                                    ],
			
 
				+                                    "spans": [
			
 
				+                                        {
			
 
				+                                            "bbox": [
			
 
				+                                                312,
			
 
				+                                                644,
			
 
				+                                                561,
			
 
				+                                                652
			
 
				+                                            ],
			
 
				+                                            "score": 0.97,
			
 
				+                                            "content": "responding to the smallest mean squared error (MSE)of the",
			
 
				+                                            "type": "text"
			
 
				+                                        }
			
 
				+                                    ]
			
 
				+                                },
			
 
				+                                {
			
 
				+                                    "bbox": [
			
 
				+                                        312,
			
 
				+                                        655,
			
 
				+                                        513,
			
 
				+                                        665
			
 
				+                                    ],
			
 
				+                                    "spans": [
			
 
				+                                        {
			
 
				+                                            "bbox": [
			
 
				+                                                312,
			
 
				+                                                655,
			
 
				+                                                513,
			
 
				+                                                665
			
 
				+                                            ],
			
 
				+                                            "score": 0.97,
			
 
				+                                            "content": "disparity estimates for a range of noise strengths.",
			
 
				+                                            "type": "text"
			
 
				+                                        }
			
 
				+                                    ]
			
 
				+                                }
			
 
				+                            ]
			
 
				+                        }
			
 
				+                    ]
			
 
				+                }
			
 
				+            ],
			
 
				+            "tables": [],
			
 
				+            "interline_equations": [],
			
 
				+            "discarded_blocks": [],
			
 
				+            "need_drop": false,
			
 
				+            "drop_reason": [],
			
 
				+            "para_blocks": [
			
 
				+                {
			
 
				+                    "type": "text",
			
 
				+                    "bbox": [
			
 
				+                        47,
			
 
				+                        57,
			
 
				+                        299,
			
 
				+                        93
			
 
				+                    ],
			
 
				+                    "lines": [
			
 
				+                        {
			
 
				+                            "bbox": [
			
 
				+                                47,
			
 
				+                                57,
			
 
				+                                299,
			
 
				+                                68
			
 
				+                            ],
			
 
				+                            "spans": [
			
 
				+                                {
			
 
				+                                    "bbox": [
			
 
				+                                        49,
			
 
				+                                        57,
			
 
				+                                        298,
			
 
				+                                        68
			
 
				+                                    ],
			
 
				+                                    "score": 0.98,
			
 
				+                                    "content": "of the synthetic stereo scene from a single camera perspective",
			
 
				+                                    "type": "text"
			
 
				+                                }
			
 
				+                            ]
			
 
				+                        },
			
 
				+                        {
			
 
				+                            "bbox": [
			
 
				+                                47,
			
 
				+                                71,
			
 
				+                                299,
			
 
				+                                80
			
 
				+                            ],
			
 
				+                            "spans": [
			
 
				+                                {
			
 
				+                                    "bbox": [
			
 
				+                                        49,
			
 
				+                                        71,
			
 
				+                                        299,
			
 
				+                                        80
			
 
				+                                    ],
			
 
				+                                    "score": 0.96,
			
 
				+                                    "content": "along with the ground truth disparity,occlusion map,and",
			
 
				+                                    "type": "text"
			
 
				+                                }
			
 
				+                            ]
			
 
				+                        },
			
 
				+                        {
			
 
				+                            "bbox": [
			
 
				+                                47,
			
 
				+                                82,
			
 
				+                                123,
			
 
				+                                93
			
 
				+                            ],
			
 
				+                            "spans": [
			
 
				+                                {
			
 
				+                                    "bbox": [
			
 
				+                                        49,
			
 
				+                                        82,
			
 
				+                                        123,
			
 
				+                                        93
			
 
				+                                    ],
			
 
				+                                    "score": 0.99,
			
 
				+                                    "content": "discontinuitymap.",
			
 
				+                                    "type": "text"
			
 
				+                                }
			
 
				+                            ]
			
 
				+                        }
			
 
				+                    ]
			
 
				+                },
			
 
				+                {
			
 
				+                    "type": "image",
			
 
				+                    "bbox": [
			
 
				+                        47,
			
 
				+                        100,
			
 
				+                        301,
			
 
				+                        535
			
 
				+                    ],
			
 
				+                    "blocks": [
			
 
				+                        {
			
 
				+                            "bbox": [
			
 
				+                                51,
			
 
				+                                100,
			
 
				+                                292,
			
 
				+                                484
			
 
				+                            ],
			
 
				+                            "type": "image_body",
			
 
				+                            "lines": [
			
 
				+                                {
			
 
				+                                    "bbox": [
			
 
				+                                        51,
			
 
				+                                        100,
			
 
				+                                        292,
			
 
				+                                        484
			
 
				+                                    ],
			
 
				+                                    "spans": [
			
 
				+                                        {
			
 
				+                                            "bbox": [
			
 
				+                                                51,
			
 
				+                                                100,
			
 
				+                                                292,
			
 
				+                                                484
			
 
				+                                            ],
			
 
				+                                            "score": 0.9999815225601196,
			
 
				+                                            "type": "image",
			
 
				+                                            "image_path": "b07d74524eac6f46b5505b48b1e10db23f2b45cb2d21d5fec72e967e61255811.jpg"
			
 
				+                                        }
			
 
				+                                    ]
			
 
				+                                }
			
 
				+                            ]
			
 
				+                        },
			
 
				+                        {
			
 
				+                            "bbox": [
			
 
				+                                47,
			
 
				+                                488,
			
 
				+                                301,
			
 
				+                                535
			
 
				+                            ],
			
 
				+                            "type": "image_caption",
			
 
				+                            "lines": [
			
 
				+                                {
			
 
				+                                    "bbox": [
			
 
				+                                        49,
			
 
				+                                        490,
			
 
				+                                        299,
			
 
				+                                        499
			
 
				+                                    ],
			
 
				+                                    "spans": [
			
 
				+                                        {
			
 
				+                                            "bbox": [
			
 
				+                                                49,
			
 
				+                                                490,
			
 
				+                                                299,
			
 
				+                                                499
			
 
				+                                            ],
			
 
				+                                            "score": 1.0,
			
 
				+                                            "content": "Figure2:Twosampleframesfromthesyntheticvideose-",
			
 
				+                                            "type": "text"
			
 
				+                                        }
			
 
				+                                    ]
			
 
				+                                },
			
 
				+                                {
			
 
				+                                    "bbox": [
			
 
				+                                        48,
			
 
				+                                        501,
			
 
				+                                        300,
			
 
				+                                        512
			
 
				+                                    ],
			
 
				+                                    "spans": [
			
 
				+                                        {
			
 
				+                                            "bbox": [
			
 
				+                                                48,
			
 
				+                                                501,
			
 
				+                                                300,
			
 
				+                                                512
			
 
				+                                            ],
			
 
				+                                            "score": 1.0,
			
 
				+                                            "content": "quence (1st row), along with their corresponding ground truth",
			
 
				+                                            "type": "text"
			
 
				+                                        }
			
 
				+                                    ]
			
 
				+                                },
			
 
				+                                {
			
 
				+                                    "bbox": [
			
 
				+                                        48,
			
 
				+                                        513,
			
 
				+                                        299,
			
 
				+                                        523
			
 
				+                                    ],
			
 
				+                                    "spans": [
			
 
				+                                        {
			
 
				+                                            "bbox": [
			
 
				+                                                48,
			
 
				+                                                513,
			
 
				+                                                299,
			
 
				+                                                523
			
 
				+                                            ],
			
 
				+                                            "score": 0.98,
			
 
				+                                            "content": "disparity (2nd row), occlusion map (3rd row), and discontinuity",
			
 
				+                                            "type": "text"
			
 
				+                                        }
			
 
				+                                    ]
			
 
				+                                },
			
 
				+                                {
			
 
				+                                    "bbox": [
			
 
				+                                        48,
			
 
				+                                        525,
			
 
				+                                        110,
			
 
				+                                        535
			
 
				+                                    ],
			
 
				+                                    "spans": [
			
 
				+                                        {
			
 
				+                                            "bbox": [
			
 
				+                                                48,
			
 
				+                                                525,
			
 
				+                                                110,
			
 
				+                                                535
			
 
				+                                            ],
			
 
				+                                            "score": 0.99,
			
 
				+                                            "content": "map (4th row).",
			
 
				+                                            "type": "text"
			
 
				+                                        }
			
 
				+                                    ]
			
 
				+                                }
			
 
				+                            ]
			
 
				+                        }
			
 
				+                    ]
			
 
				+                },
			
 
				+                {
			
 
				+                    "type": "text",
			
 
				+                    "bbox": [
			
 
				+                        47,
			
 
				+                        549,
			
 
				+                        299,
			
 
				+                        678
			
 
				+                    ],
			
 
				+                    "lines": [
			
 
				+                        {
			
 
				+                            "bbox": [
			
 
				+                                58,
			
 
				+                                549,
			
 
				+                                299,
			
 
				+                                558
			
 
				+                            ],
			
 
				+                            "spans": [
			
 
				+                                {
			
 
				+                                    "bbox": [
			
 
				+                                        58,
			
 
				+                                        549,
			
 
				+                                        298,
			
 
				+                                        558
			
 
				+                                    ],
			
 
				+                                    "score": 0.98,
			
 
				+                                    "content": "Theresultsof temporalstereomatching aregiveninFigure",
			
 
				+                                    "type": "text"
			
 
				+                                }
			
 
				+                            ]
			
 
				+                        },
			
 
				+                        {
			
 
				+                            "bbox": [
			
 
				+                                47,
			
 
				+                                561,
			
 
				+                                299,
			
 
				+                                570
			
 
				+                            ],
			
 
				+                            "spans": [
			
 
				+                                {
			
 
				+                                    "bbox": [
			
 
				+                                        47,
			
 
				+                                        561,
			
 
				+                                        298,
			
 
				+                                        570
			
 
				+                                    ],
			
 
				+                                    "score": 0.98,
			
 
				+                                    "content": "3foruniformadditivenoiseconfinedtotherangesof±O",
			
 
				+                                    "type": "text"
			
 
				+                                }
			
 
				+                            ]
			
 
				+                        },
			
 
				+                        {
			
 
				+                            "bbox": [
			
 
				+                                47,
			
 
				+                                573,
			
 
				+                                299,
			
 
				+                                582
			
 
				+                            ],
			
 
				+                            "spans": [
			
 
				+                                {
			
 
				+                                    "bbox": [
			
 
				+                                        49,
			
 
				+                                        573,
			
 
				+                                        299,
			
 
				+                                        582
			
 
				+                                    ],
			
 
				+                                    "score": 0.96,
			
 
				+                                    "content": "±20, and ±40. Each performance plot is given as a function",
			
 
				+                                    "type": "text"
			
 
				+                                }
			
 
				+                            ]
			
 
				+                        },
			
 
				+                        {
			
 
				+                            "bbox": [
			
 
				+                                47,
			
 
				+                                585,
			
 
				+                                299,
			
 
				+                                594
			
 
				+                            ],
			
 
				+                            "spans": [
			
 
				+                                {
			
 
				+                                    "bbox": [
			
 
				+                                        48,
			
 
				+                                        585,
			
 
				+                                        299,
			
 
				+                                        594
			
 
				+                                    ],
			
 
				+                                    "score": 0.95,
			
 
				+                                    "content": "of the feedback coefficient X. As with the majority of temporal",
			
 
				+                                    "type": "text"
			
 
				+                                }
			
 
				+                            ]
			
 
				+                        },
			
 
				+                        {
			
 
				+                            "bbox": [
			
 
				+                                47,
			
 
				+                                597,
			
 
				+                                299,
			
 
				+                                606
			
 
				+                            ],
			
 
				+                            "spans": [
			
 
				+                                {
			
 
				+                                    "bbox": [
			
 
				+                                        49,
			
 
				+                                        597,
			
 
				+                                        299,
			
 
				+                                        606
			
 
				+                                    ],
			
 
				+                                    "score": 0.99,
			
 
				+                                    "content": "stereomatching methods,improvements are negligible when",
			
 
				+                                    "type": "text"
			
 
				+                                }
			
 
				+                            ]
			
 
				+                        },
			
 
				+                        {
			
 
				+                            "bbox": [
			
 
				+                                47,
			
 
				+                                609,
			
 
				+                                299,
			
 
				+                                618
			
 
				+                            ],
			
 
				+                            "spans": [
			
 
				+                                {
			
 
				+                                    "bbox": [
			
 
				+                                        48,
			
 
				+                                        609,
			
 
				+                                        299,
			
 
				+                                        618
			
 
				+                                    ],
			
 
				+                                    "score": 0.97,
			
 
				+                                    "content": "no noise is added to the images [1o], [19]. This is largely due",
			
 
				+                                    "type": "text"
			
 
				+                                }
			
 
				+                            ]
			
 
				+                        },
			
 
				+                        {
			
 
				+                            "bbox": [
			
 
				+                                47,
			
 
				+                                621,
			
 
				+                                299,
			
 
				+                                629
			
 
				+                            ],
			
 
				+                            "spans": [
			
 
				+                                {
			
 
				+                                    "bbox": [
			
 
				+                                        48,
			
 
				+                                        621,
			
 
				+                                        299,
			
 
				+                                        629
			
 
				+                                    ],
			
 
				+                                    "score": 1.0,
			
 
				+                                    "content": "tothefactthatthevideousedtoevaluatethesemethodsis",
			
 
				+                                    "type": "text"
			
 
				+                                }
			
 
				+                            ]
			
 
				+                        },
			
 
				+                        {
			
 
				+                            "bbox": [
			
 
				+                                47,
			
 
				+                                633,
			
 
				+                                299,
			
 
				+                                641
			
 
				+                            ],
			
 
				+                            "spans": [
			
 
				+                                {
			
 
				+                                    "bbox": [
			
 
				+                                        48,
			
 
				+                                        633,
			
 
				+                                        299,
			
 
				+                                        641
			
 
				+                                    ],
			
 
				+                                    "score": 1.0,
			
 
				+                                    "content": "computergeneratedwithverylittlenoisetostartwith,thus",
			
 
				+                                    "type": "text"
			
 
				+                                }
			
 
				+                            ]
			
 
				+                        },
			
 
				+                        {
			
 
				+                            "bbox": [
			
 
				+                                47,
			
 
				+                                644,
			
 
				+                                299,
			
 
				+                                654
			
 
				+                            ],
			
 
				+                            "spans": [
			
 
				+                                {
			
 
				+                                    "bbox": [
			
 
				+                                        48,
			
 
				+                                        644,
			
 
				+                                        299,
			
 
				+                                        654
			
 
				+                                    ],
			
 
				+                                    "score": 0.98,
			
 
				+                                    "content": "the noise suppression achieved with temporal stereo matching",
			
 
				+                                    "type": "text"
			
 
				+                                }
			
 
				+                            ]
			
 
				+                        },
			
 
				+                        {
			
 
				+                            "bbox": [
			
 
				+                                47,
			
 
				+                                657,
			
 
				+                                299,
			
 
				+                                666
			
 
				+                            ],
			
 
				+                            "spans": [
			
 
				+                                {
			
 
				+                                    "bbox": [
			
 
				+                                        48,
			
 
				+                                        657,
			
 
				+                                        299,
			
 
				+                                        666
			
 
				+                                    ],
			
 
				+                                    "score": 0.98,
			
 
				+                                    "content": "showslittletonoimprovementovermethodsthatoperate on",
			
 
				+                                    "type": "text"
			
 
				+                                }
			
 
				+                            ]
			
 
				+                        },
			
 
				+                        {
			
 
				+                            "bbox": [
			
 
				+                                47,
			
 
				+                                669,
			
 
				+                                113,
			
 
				+                                678
			
 
				+                            ],
			
 
				+                            "spans": [
			
 
				+                                {
			
 
				+                                    "bbox": [
			
 
				+                                        48,
			
 
				+                                        669,
			
 
				+                                        113,
			
 
				+                                        678
			
 
				+                                    ],
			
 
				+                                    "score": 1.0,
			
 
				+                                    "content": "pairsofimages.",
			
 
				+                                    "type": "text"
			
 
				+                                }
			
 
				+                            ]
			
 
				+                        }
			
 
				+                    ]
			
 
				+                },
			
 
				+                {
			
 
				+                    "type": "text",
			
 
				+                    "bbox": [
			
 
				+                        47,
			
 
				+                        680,
			
 
				+                        299,
			
 
				+                        725
			
 
				+                    ],
			
 
				+                    "lines": [
			
 
				+                        {
			
 
				+                            "bbox": [
			
 
				+                                58,
			
 
				+                                680,
			
 
				+                                299,
			
 
				+                                690
			
 
				+                            ],
			
 
				+                            "spans": [
			
 
				+                                {
			
 
				+                                    "bbox": [
			
 
				+                                        59,
			
 
				+                                        680,
			
 
				+                                        298,
			
 
				+                                        690
			
 
				+                                    ],
			
 
				+                                    "score": 0.97,
			
 
				+                                    "content": "Significantimprovementsin accuracy canbeseenin Figure",
			
 
				+                                    "type": "text"
			
 
				+                                }
			
 
				+                            ]
			
 
				+                        },
			
 
				+                        {
			
 
				+                            "bbox": [
			
 
				+                                47,
			
 
				+                                692,
			
 
				+                                299,
			
 
				+                                701
			
 
				+                            ],
			
 
				+                            "spans": [
			
 
				+                                {
			
 
				+                                    "bbox": [
			
 
				+                                        48,
			
 
				+                                        692,
			
 
				+                                        298,
			
 
				+                                        701
			
 
				+                                    ],
			
 
				+                                    "score": 0.97,
			
 
				+                                    "content": "3 when the noise has ranges of ±20, and ±40.In this scenario",
			
 
				+                                    "type": "text"
			
 
				+                                }
			
 
				+                            ]
			
 
				+                        },
			
 
				+                        {
			
 
				+                            "bbox": [
			
 
				+                                47,
			
 
				+                                703,
			
 
				+                                299,
			
 
				+                                714
			
 
				+                            ],
			
 
				+                            "spans": [
			
 
				+                                {
			
 
				+                                    "bbox": [
			
 
				+                                        48,
			
 
				+                                        703,
			
 
				+                                        299,
			
 
				+                                        714
			
 
				+                                    ],
			
 
				+                                    "score": 0.98,
			
 
				+                                    "content": "the effect of noise in the current frame is reduced by increasing",
			
 
				+                                    "type": "text"
			
 
				+                                }
			
 
				+                            ]
			
 
				+                        },
			
 
				+                        {
			
 
				+                            "bbox": [
			
 
				+                                47,
			
 
				+                                716,
			
 
				+                                299,
			
 
				+                                725
			
 
				+                            ],
			
 
				+                            "spans": [
			
 
				+                                {
			
 
				+                                    "bbox": [
			
 
				+                                        48,
			
 
				+                                        716,
			
 
				+                                        299,
			
 
				+                                        725
			
 
				+                                    ],
			
 
				+                                    "score": 0.96,
			
 
				+                                    "content": "thefeedbackcoefficientX.Thisincreasing ofXhas theeffect",
			
 
				+                                    "type": "text"
			
 
				+                                }
			
 
				+                            ]
			
 
				+                        }
			
 
				+                    ]
			
 
				+                },
			
 
				+                {
			
 
				+                    "type": "image",
			
 
				+                    "bbox": [
			
 
				+                        310,
			
 
				+                        55,
			
 
				+                        564,
			
 
				+                        371
			
 
				+                    ],
			
 
				+                    "blocks": [
			
 
				+                        {
			
 
				+                            "bbox": [
			
 
				+                                314,
			
 
				+                                55,
			
 
				+                                538,
			
 
				+                                305
			
 
				+                            ],
			
 
				+                            "type": "image_body",
			
 
				+                            "lines": [
			
 
				+                                {
			
 
				+                                    "bbox": [
			
 
				+                                        314,
			
 
				+                                        55,
			
 
				+                                        538,
			
 
				+                                        305
			
 
				+                                    ],
			
 
				+                                    "spans": [
			
 
				+                                        {
			
 
				+                                            "bbox": [
			
 
				+                                                314,
			
 
				+                                                55,
			
 
				+                                                538,
			
 
				+                                                305
			
 
				+                                            ],
			
 
				+                                            "score": 0.9999905824661255,
			
 
				+                                            "type": "image",
			
 
				+                                            "image_path": "c7539af438972442d0f86aa46409e6684338ddfd1fbfd6bdacf02220853ccb55.jpg"
			
 
				+                                        }
			
 
				+                                    ]
			
 
				+                                }
			
 
				+                            ]
			
 
				+                        },
			
 
				+                        {
			
 
				+                            "bbox": [
			
 
				+                                310,
			
 
				+                                311,
			
 
				+                                564,
			
 
				+                                371
			
 
				+                            ],
			
 
				+                            "type": "image_caption",
			
 
				+                            "lines": [
			
 
				+                                {
			
 
				+                                    "bbox": [
			
 
				+                                        312,
			
 
				+                                        313,
			
 
				+                                        562,
			
 
				+                                        322
			
 
				+                                    ],
			
 
				+                                    "spans": [
			
 
				+                                        {
			
 
				+                                            "bbox": [
			
 
				+                                                312,
			
 
				+                                                313,
			
 
				+                                                562,
			
 
				+                                                322
			
 
				+                                            ],
			
 
				+                                            "score": 0.97,
			
 
				+                                            "content": "Figure 3: Performance of temporal matching at different levels",
			
 
				+                                            "type": "text"
			
 
				+                                        }
			
 
				+                                    ]
			
 
				+                                },
			
 
				+                                {
			
 
				+                                    "bbox": [
			
 
				+                                        312,
			
 
				+                                        325,
			
 
				+                                        561,
			
 
				+                                        334
			
 
				+                                    ],
			
 
				+                                    "spans": [
			
 
				+                                        {
			
 
				+                                            "bbox": [
			
 
				+                                                312,
			
 
				+                                                325,
			
 
				+                                                561,
			
 
				+                                                334
			
 
				+                                            ],
			
 
				+                                            "score": 0.98,
			
 
				+                                            "content": "of uniformly distributed image noise{±0,±20,±40}.Mean",
			
 
				+                                            "type": "text"
			
 
				+                                        }
			
 
				+                                    ]
			
 
				+                                },
			
 
				+                                {
			
 
				+                                    "bbox": [
			
 
				+                                        311,
			
 
				+                                        336,
			
 
				+                                        563,
			
 
				+                                        347
			
 
				+                                    ],
			
 
				+                                    "spans": [
			
 
				+                                        {
			
 
				+                                            "bbox": [
			
 
				+                                                311,
			
 
				+                                                336,
			
 
				+                                                563,
			
 
				+                                                347
			
 
				+                                            ],
			
 
				+                                            "score": 0.99,
			
 
				+                                            "content": "squared error (MSE) of disparities is plotted versus the values",
			
 
				+                                            "type": "text"
			
 
				+                                        }
			
 
				+                                    ]
			
 
				+                                },
			
 
				+                                {
			
 
				+                                    "bbox": [
			
 
				+                                        311,
			
 
				+                                        348,
			
 
				+                                        561,
			
 
				+                                        358
			
 
				+                                    ],
			
 
				+                                    "spans": [
			
 
				+                                        {
			
 
				+                                            "bbox": [
			
 
				+                                                311,
			
 
				+                                                348,
			
 
				+                                                561,
			
 
				+                                                358
			
 
				+                                            ],
			
 
				+                                            "score": 0.96,
			
 
				+                                            "content": "of the feedback coefficient X. Dashed lines correspond to the",
			
 
				+                                            "type": "text"
			
 
				+                                        }
			
 
				+                                    ]
			
 
				+                                },
			
 
				+                                {
			
 
				+                                    "bbox": [
			
 
				+                                        311,
			
 
				+                                        360,
			
 
				+                                        535,
			
 
				+                                        371
			
 
				+                                    ],
			
 
				+                                    "spans": [
			
 
				+                                        {
			
 
				+                                            "bbox": [
			
 
				+                                                311,
			
 
				+                                                360,
			
 
				+                                                535,
			
 
				+                                                371
			
 
				+                                            ],
			
 
				+                                            "score": 0.96,
			
 
				+                                            "content": "values of MSE obtained without temporal aggregation.",
			
 
				+                                            "type": "text"
			
 
				+                                        }
			
 
				+                                    ]
			
 
				+                                }
			
 
				+                            ]
			
 
				+                        }
			
 
				+                    ]
			
 
				+                },
			
 
				+                {
			
 
				+                    "type": "image",
			
 
				+                    "bbox": [
			
 
				+                        310,
			
 
				+                        418,
			
 
				+                        563,
			
 
				+                        666
			
 
				+                    ],
			
 
				+                    "blocks": [
			
 
				+                        {
			
 
				+                            "bbox": [
			
 
				+                                314,
			
 
				+                                418,
			
 
				+                                549,
			
 
				+                                623
			
 
				+                            ],
			
 
				+                            "type": "image_body",
			
 
				+                            "lines": [
			
 
				+                                {
			
 
				+                                    "bbox": [
			
 
				+                                        314,
			
 
				+                                        418,
			
 
				+                                        549,
			
 
				+                                        623
			
 
				+                                    ],
			
 
				+                                    "spans": [
			
 
				+                                        {
			
 
				+                                            "bbox": [
			
 
				+                                                314,
			
 
				+                                                418,
			
 
				+                                                549,
			
 
				+                                                623
			
 
				+                                            ],
			
 
				+                                            "score": 0.9999067783355713,
			
 
				+                                            "type": "image",
			
 
				+                                            "image_path": "9ac4db9197801de4a20dbc9ea17bc0c53afb7290dc8b5b45d9e92e830566cb14.jpg"
			
 
				+                                        }
			
 
				+                                    ]
			
 
				+                                }
			
 
				+                            ]
			
 
				+                        },
			
 
				+                        {
			
 
				+                            "bbox": [
			
 
				+                                310,
			
 
				+                                630,
			
 
				+                                563,
			
 
				+                                666
			
 
				+                            ],
			
 
				+                            "type": "image_caption",
			
 
				+                            "lines": [
			
 
				+                                {
			
 
				+                                    "bbox": [
			
 
				+                                        312,
			
 
				+                                        631,
			
 
				+                                        562,
			
 
				+                                        641
			
 
				+                                    ],
			
 
				+                                    "spans": [
			
 
				+                                        {
			
 
				+                                            "bbox": [
			
 
				+                                                312,
			
 
				+                                                631,
			
 
				+                                                562,
			
 
				+                                                641
			
 
				+                                            ],
			
 
				+                                            "score": 0.94,
			
 
				+                                            "content": "Figure 4:Optimal values of the feedback coefficient \\ cor-",
			
 
				+                                            "type": "text"
			
 
				+                                        }
			
 
				+                                    ]
			
 
				+                                },
			
 
				+                                {
			
 
				+                                    "bbox": [
			
 
				+                                        312,
			
 
				+                                        644,
			
 
				+                                        561,
			
 
				+                                        652
			
 
				+                                    ],
			
 
				+                                    "spans": [
			
 
				+                                        {
			
 
				+                                            "bbox": [
			
 
				+                                                312,
			
 
				+                                                644,
			
 
				+                                                561,
			
 
				+                                                652
			
 
				+                                            ],
			
 
				+                                            "score": 0.97,
			
 
				+                                            "content": "responding to the smallest mean squared error (MSE)of the",
			
 
				+                                            "type": "text"
			
 
				+                                        }
			
 
				+                                    ]
			
 
				+                                },
			
 
				+                                {
			
 
				+                                    "bbox": [
			
 
				+                                        312,
			
 
				+                                        655,
			
 
				+                                        513,
			
 
				+                                        665
			
 
				+                                    ],
			
 
				+                                    "spans": [
			
 
				+                                        {
			
 
				+                                            "bbox": [
			
 
				+                                                312,
			
 
				+                                                655,
			
 
				+                                                513,
			
 
				+                                                665
			
 
				+                                            ],
			
 
				+                                            "score": 0.97,
			
 
				+                                            "content": "disparity estimates for a range of noise strengths.",
			
 
				+                                            "type": "text"
			
 
				+                                        }
			
 
				+                                    ]
			
 
				+                                }
			
 
				+                            ]
			
 
				+                        }
			
 
				+                    ]
			
 
				+                },
			
 
				+                {
			
 
				+                    "type": "text",
			
 
				+                    "bbox": [
			
 
				+                        311,
			
 
				+                        692,
			
 
				+                        563,
			
 
				+                        725
			
 
				+                    ],
			
 
				+                    "lines": [
			
 
				+                        {
			
 
				+                            "bbox": [
			
 
				+                                311,
			
 
				+                                692,
			
 
				+                                563,
			
 
				+                                702
			
 
				+                            ],
			
 
				+                            "spans": [
			
 
				+                                {
			
 
				+                                    "bbox": [
			
 
				+                                        311,
			
 
				+                                        692,
			
 
				+                                        562,
			
 
				+                                        702
			
 
				+                                    ],
			
 
				+                                    "score": 0.95,
			
 
				+                                    "content": "of averaging out noise in the per-pixel costs by selecting",
			
 
				+                                    "type": "text"
			
 
				+                                }
			
 
				+                            ]
			
 
				+                        },
			
 
				+                        {
			
 
				+                            "bbox": [
			
 
				+                                311,
			
 
				+                                704,
			
 
				+                                563,
			
 
				+                                713
			
 
				+                            ],
			
 
				+                            "spans": [
			
 
				+                                {
			
 
				+                                    "bbox": [
			
 
				+                                        311,
			
 
				+                                        704,
			
 
				+                                        562,
			
 
				+                                        713
			
 
				+                                    ],
			
 
				+                                    "score": 0.98,
			
 
				+                                    "content": "matches based more heavily upon the auxiliary cost, which",
			
 
				+                                    "type": "text"
			
 
				+                                }
			
 
				+                            ]
			
 
				+                        },
			
 
				+                        {
			
 
				+                            "bbox": [
			
 
				+                                311,
			
 
				+                                716,
			
 
				+                                563,
			
 
				+                                725
			
 
				+                            ],
			
 
				+                            "spans": [
			
 
				+                                {
			
 
				+                                    "bbox": [
			
 
				+                                        311,
			
 
				+                                        716,
			
 
				+                                        563,
			
 
				+                                        725
			
 
				+                                    ],
			
 
				+                                    "score": 0.97,
			
 
				+                                    "content": "is essentially a much more stable running average of the cost",
			
 
				+                                    "type": "text"
			
 
				+                                }
			
 
				+                            ]
			
 
				+                        }
			
 
				+                    ]
			
 
				+                }
			
 
				+            ]
			
 
				+        }
			
 
				+    ],
			
 
				+    "_parse_type": "ocr",
			
 
				+    "_version_name": "0.7.0b1"
			
 
				+}
			
--- a/tests/test_integrations/test_rag/assets/one_page_with_table_image.2.pdf
+++ b/tests/test_integrations/test_rag/assets/one_page_with_table_image.2.pdf
--- a/tests/test_integrations/test_rag/assets/one_page_with_table_image.pdf
+++ b/tests/test_integrations/test_rag/assets/one_page_with_table_image.pdf
--- a/tests/test_integrations/test_rag/test_api.py
+++ b/tests/test_integrations/test_rag/test_api.py
@@ -0,0 +1,55 @@
 
				+import json
			
 
				+import os
			
 
				+import shutil
			
 
				+import tempfile
			
 
				+
			
 
				+from magic_pdf.integrations.rag.api import DataReader, RagDocumentReader
			
 
				+from magic_pdf.integrations.rag.type import CategoryType
			
 
				+from magic_pdf.integrations.rag.utils import \
			
 
				+    convert_middle_json_to_layout_elements
			
 
				+
			
 
				+
			
 
				+def test_rag_document_reader():
			
 
				+    # setup
			
 
				+    unitest_dir = '/tmp/magic_pdf/unittest/integrations/rag'
			
 
				+    os.makedirs(unitest_dir, exist_ok=True)
			
 
				+    temp_output_dir = tempfile.mkdtemp(dir=unitest_dir)
			
 
				+    os.makedirs(temp_output_dir, exist_ok=True)
			
 
				+
			
 
				+    # test
			
 
				+    with open('tests/test_integrations/test_rag/assets/middle.json') as f:
			
 
				+        json_data = json.load(f)
			
 
				+    res = convert_middle_json_to_layout_elements(json_data, temp_output_dir)
			
 
				+
			
 
				+    doc = RagDocumentReader(res)
			
 
				+    assert len(list(iter(doc))) == 1
			
 
				+
			
 
				+    page = list(iter(doc))[0]
			
 
				+    assert len(list(iter(page))) == 10
			
 
				+    assert len(page.get_rel_map()) == 3
			
 
				+
			
 
				+    item = list(iter(page))[0]
			
 
				+    assert item.category_type == CategoryType.text
			
 
				+
			
 
				+    # teardown
			
 
				+    shutil.rmtree(temp_output_dir)
			
 
				+
			
 
				+
			
 
				+def test_data_reader():
			
 
				+    # setup
			
 
				+    unitest_dir = '/tmp/magic_pdf/unittest/integrations/rag'
			
 
				+    os.makedirs(unitest_dir, exist_ok=True)
			
 
				+    temp_output_dir = tempfile.mkdtemp(dir=unitest_dir)
			
 
				+    os.makedirs(temp_output_dir, exist_ok=True)
			
 
				+
			
 
				+    # test
			
 
				+    data_reader = DataReader('tests/test_integrations/test_rag/assets', 'ocr',
			
 
				+                             temp_output_dir)
			
 
				+
			
 
				+    assert data_reader.get_documents_count() == 2
			
 
				+    for idx in range(data_reader.get_documents_count()):
			
 
				+        document = data_reader.get_document_result(idx)
			
 
				+        assert document is not None
			
 
				+
			
 
				+    # teardown
			
 
				+    shutil.rmtree(temp_output_dir)
			
--- a/tests/test_integrations/test_rag/test_utils.py
+++ b/tests/test_integrations/test_rag/test_utils.py
@@ -0,0 +1,57 @@
 
				+import json
			
 
				+import os
			
 
				+import shutil
			
 
				+import tempfile
			
 
				+
			
 
				+from magic_pdf.integrations.rag.type import CategoryType
			
 
				+from magic_pdf.integrations.rag.utils import (
			
 
				+    convert_middle_json_to_layout_elements, inference)
			
 
				+
			
 
				+
			
 
				+def test_convert_middle_json_to_layout_elements():
			
 
				+    # setup
			
 
				+    unitest_dir = '/tmp/magic_pdf/unittest/integrations/rag'
			
 
				+    os.makedirs(unitest_dir, exist_ok=True)
			
 
				+    temp_output_dir = tempfile.mkdtemp(dir=unitest_dir)
			
 
				+    os.makedirs(temp_output_dir, exist_ok=True)
			
 
				+
			
 
				+    # test
			
 
				+    with open('tests/test_integrations/test_rag/assets/middle.json') as f:
			
 
				+        json_data = json.load(f)
			
 
				+    res = convert_middle_json_to_layout_elements(json_data, temp_output_dir)
			
 
				+
			
 
				+    assert len(res) == 1
			
 
				+    assert len(res[0].layout_dets) == 10
			
 
				+    assert res[0].layout_dets[0].anno_id == 0
			
 
				+    assert res[0].layout_dets[0].category_type == CategoryType.text
			
 
				+    assert len(res[0].extra.element_relation) == 3
			
 
				+
			
 
				+    # teardown
			
 
				+    shutil.rmtree(temp_output_dir)
			
 
				+
			
 
				+
			
 
				+def test_inference():
			
 
				+
			
 
				+    asset_dir = 'tests/test_integrations/test_rag/assets'
			
 
				+    # setup
			
 
				+    unitest_dir = '/tmp/magic_pdf/unittest/integrations/rag'
			
 
				+    os.makedirs(unitest_dir, exist_ok=True)
			
 
				+    temp_output_dir = tempfile.mkdtemp(dir=unitest_dir)
			
 
				+    os.makedirs(temp_output_dir, exist_ok=True)
			
 
				+
			
 
				+    # test
			
 
				+    res = inference(
			
 
				+        asset_dir + '/one_page_with_table_image.pdf',
			
 
				+        temp_output_dir,
			
 
				+        'ocr',
			
 
				+    )
			
 
				+
			
 
				+    assert res is not None
			
 
				+    assert len(res) == 1
			
 
				+    assert len(res[0].layout_dets) == 10
			
 
				+    assert res[0].layout_dets[0].anno_id == 0
			
 
				+    assert res[0].layout_dets[0].category_type == CategoryType.text
			
 
				+    assert len(res[0].extra.element_relation) == 3
			
 
				+
			
 
				+    # teardown
			
 
				+    shutil.rmtree(temp_output_dir)
			
--- a/tests/test_tools/test_common.py
+++ b/tests/test_tools/test_common.py
@@ -19,7 +19,12 @@ def test_common_do_parse(method):
 
				     # run
			
 
				     with open("tests/test_tools/assets/common/cli_test_01.pdf", "rb") as f:
			
 
				         bits = f.read()
			
 
				-    do_parse(temp_output_dir, filename, bits, [], method, f_dump_content_list=True)
			
 
				+    do_parse(temp_output_dir,
			
 
				+             filename,
			
 
				+             bits, [],
			
 
				+             method,
			
 
				+             False,
			
 
				+             f_dump_content_list=True)
			
 
				 
			
 
				     # check
			
 
				     base_output_dir = os.path.join(temp_output_dir, f"fake/{method}")