__init__.py 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126
  1. from typing import Callable
  2. from abc import ABC, abstractmethod
  3. from magic_pdf.data.data_reader_writer import DataWriter
  4. from magic_pdf.data.dataset import Dataset
  5. from magic_pdf.pipe.operators import PipeResult
  6. __use_inside_model__ = True
  7. __model_mode__ = "full"
  8. class InferenceResultBase(ABC):
  9. @abstractmethod
  10. def __init__(self, inference_results: list, dataset: Dataset):
  11. """Initialized method.
  12. Args:
  13. inference_results (list): the inference result generated by model
  14. dataset (Dataset): the dataset related with model inference result
  15. """
  16. self._infer_res = inference_results
  17. self._dataset = dataset
  18. @abstractmethod
  19. def draw_model(self, file_path: str) -> None:
  20. """Draw model inference result.
  21. Args:
  22. file_path (str): the output file path
  23. """
  24. pass
  25. @abstractmethod
  26. def dump_model(self, writer: DataWriter, file_path: str):
  27. """Dump model inference result to file.
  28. Args:
  29. writer (DataWriter): writer handle
  30. file_path (str): the location of target file
  31. """
  32. pass
  33. @abstractmethod
  34. def get_infer_res(self):
  35. """Get the inference result.
  36. Returns:
  37. list: the inference result generated by model
  38. """
  39. pass
  40. @abstractmethod
  41. def apply(self, proc: Callable, *args, **kwargs):
  42. """Apply callable method which.
  43. Args:
  44. proc (Callable): invoke proc as follows:
  45. proc(inference_result, *args, **kwargs)
  46. Returns:
  47. Any: return the result generated by proc
  48. """
  49. pass
  50. @abstractmethod
  51. def pipe_auto_mode(
  52. self,
  53. imageWriter: DataWriter,
  54. start_page_id=0,
  55. end_page_id=None,
  56. debug_mode=False,
  57. lang=None,
  58. ) -> PipeResult:
  59. """Post-proc the model inference result.
  60. step1: classify the dataset type
  61. step2: based the result of step1, using `pipe_txt_mode` or `pipe_ocr_mode`
  62. Args:
  63. imageWriter (DataWriter): the image writer handle
  64. start_page_id (int, optional): Defaults to 0. Let user select some pages He/She want to process
  65. end_page_id (int, optional): Defaults to the last page index of dataset. Let user select some pages He/She want to process
  66. debug_mode (bool, optional): Defaults to False. will dump more log if enabled
  67. lang (str, optional): Defaults to None.
  68. Returns:
  69. PipeResult: the result
  70. """
  71. pass
  72. @abstractmethod
  73. def pipe_txt_mode(
  74. self,
  75. imageWriter: DataWriter,
  76. start_page_id=0,
  77. end_page_id=None,
  78. debug_mode=False,
  79. lang=None,
  80. ) -> PipeResult:
  81. """Post-proc the model inference result, Extract the text using the
  82. third library, such as `pymupdf`
  83. Args:
  84. imageWriter (DataWriter): the image writer handle
  85. start_page_id (int, optional): Defaults to 0. Let user select some pages He/She want to process
  86. end_page_id (int, optional): Defaults to the last page index of dataset. Let user select some pages He/She want to process
  87. debug_mode (bool, optional): Defaults to False. will dump more log if enabled
  88. lang (str, optional): Defaults to None.
  89. Returns:
  90. PipeResult: the result
  91. """
  92. pass
  93. @abstractmethod
  94. def pipe_ocr_mode(
  95. self,
  96. imageWriter: DataWriter,
  97. start_page_id=0,
  98. end_page_id=None,
  99. debug_mode=False,
  100. lang=None,
  101. ) -> PipeResult:
  102. pass