__init__.py 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125
  1. from typing import Callable
  2. from abc import ABC, abstractmethod
  3. from magic_pdf.data.data_reader_writer import DataWriter
  4. from magic_pdf.data.dataset import Dataset
  5. from magic_pdf.operators.pipes import PipeResult
  6. __use_inside_model__ = True
  7. __model_mode__ = "full"
  8. class InferenceResultBase(ABC):
  9. @abstractmethod
  10. def __init__(self, inference_results: list, dataset: Dataset):
  11. """Initialized method.
  12. Args:
  13. inference_results (list): the inference result generated by model
  14. dataset (Dataset): the dataset related with model inference result
  15. """
  16. pass
  17. @abstractmethod
  18. def draw_model(self, file_path: str) -> None:
  19. """Draw model inference result.
  20. Args:
  21. file_path (str): the output file path
  22. """
  23. pass
  24. @abstractmethod
  25. def dump_model(self, writer: DataWriter, file_path: str):
  26. """Dump model inference result to file.
  27. Args:
  28. writer (DataWriter): writer handle
  29. file_path (str): the location of target file
  30. """
  31. pass
  32. @abstractmethod
  33. def get_infer_res(self):
  34. """Get the inference result.
  35. Returns:
  36. list: the inference result generated by model
  37. """
  38. pass
  39. @abstractmethod
  40. def apply(self, proc: Callable, *args, **kwargs):
  41. """Apply callable method which.
  42. Args:
  43. proc (Callable): invoke proc as follows:
  44. proc(inference_result, *args, **kwargs)
  45. Returns:
  46. Any: return the result generated by proc
  47. """
  48. pass
  49. @abstractmethod
  50. def pipe_auto_mode(
  51. self,
  52. imageWriter: DataWriter,
  53. start_page_id=0,
  54. end_page_id=None,
  55. debug_mode=False,
  56. lang=None,
  57. ) -> PipeResult:
  58. """Post-proc the model inference result.
  59. step1: classify the dataset type
  60. step2: based the result of step1, using `pipe_txt_mode` or `pipe_ocr_mode`
  61. Args:
  62. imageWriter (DataWriter): the image writer handle
  63. start_page_id (int, optional): Defaults to 0. Let user select some pages He/She want to process
  64. end_page_id (int, optional): Defaults to the last page index of dataset. Let user select some pages He/She want to process
  65. debug_mode (bool, optional): Defaults to False. will dump more log if enabled
  66. lang (str, optional): Defaults to None.
  67. Returns:
  68. PipeResult: the result
  69. """
  70. pass
  71. @abstractmethod
  72. def pipe_txt_mode(
  73. self,
  74. imageWriter: DataWriter,
  75. start_page_id=0,
  76. end_page_id=None,
  77. debug_mode=False,
  78. lang=None,
  79. ) -> PipeResult:
  80. """Post-proc the model inference result, Extract the text using the
  81. third library, such as `pymupdf`
  82. Args:
  83. imageWriter (DataWriter): the image writer handle
  84. start_page_id (int, optional): Defaults to 0. Let user select some pages He/She want to process
  85. end_page_id (int, optional): Defaults to the last page index of dataset. Let user select some pages He/She want to process
  86. debug_mode (bool, optional): Defaults to False. will dump more log if enabled
  87. lang (str, optional): Defaults to None.
  88. Returns:
  89. PipeResult: the result
  90. """
  91. pass
  92. @abstractmethod
  93. def pipe_ocr_mode(
  94. self,
  95. imageWriter: DataWriter,
  96. start_page_id=0,
  97. end_page_id=None,
  98. debug_mode=False,
  99. lang=None,
  100. ) -> PipeResult:
  101. pass