__init__.py 2.6 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394
  1. from abc import ABC, abstractmethod
  2. from typing import Callable
  3. from magic_pdf.data.data_reader_writer import DataWriter
  4. from magic_pdf.data.dataset import Dataset
  5. from magic_pdf.operators.pipes import PipeResult
  6. class InferenceResultBase(ABC):
  7. @abstractmethod
  8. def __init__(self, inference_results: list, dataset: Dataset):
  9. """Initialized method.
  10. Args:
  11. inference_results (list): the inference result generated by model
  12. dataset (Dataset): the dataset related with model inference result
  13. """
  14. pass
  15. @abstractmethod
  16. def draw_model(self, file_path: str) -> None:
  17. """Draw model inference result.
  18. Args:
  19. file_path (str): the output file path
  20. """
  21. pass
  22. @abstractmethod
  23. def dump_model(self, writer: DataWriter, file_path: str):
  24. """Dump model inference result to file.
  25. Args:
  26. writer (DataWriter): writer handle
  27. file_path (str): the location of target file
  28. """
  29. pass
  30. @abstractmethod
  31. def get_infer_res(self):
  32. """Get the inference result.
  33. Returns:
  34. list: the inference result generated by model
  35. """
  36. pass
  37. @abstractmethod
  38. def apply(self, proc: Callable, *args, **kwargs):
  39. """Apply callable method which.
  40. Args:
  41. proc (Callable): invoke proc as follows:
  42. proc(inference_result, *args, **kwargs)
  43. Returns:
  44. Any: return the result generated by proc
  45. """
  46. pass
  47. def pipe_txt_mode(
  48. self,
  49. imageWriter: DataWriter,
  50. start_page_id=0,
  51. end_page_id=None,
  52. debug_mode=False,
  53. lang=None,
  54. ) -> PipeResult:
  55. """Post-proc the model inference result, Extract the text using the
  56. third library, such as `pymupdf`
  57. Args:
  58. imageWriter (DataWriter): the image writer handle
  59. start_page_id (int, optional): Defaults to 0. Let user select some pages He/She want to process
  60. end_page_id (int, optional): Defaults to the last page index of dataset. Let user select some pages He/She want to process
  61. debug_mode (bool, optional): Defaults to False. will dump more log if enabled
  62. lang (str, optional): Defaults to None.
  63. Returns:
  64. PipeResult: the result
  65. """
  66. pass
  67. @abstractmethod
  68. def pipe_ocr_mode(
  69. self,
  70. imageWriter: DataWriter,
  71. start_page_id=0,
  72. end_page_id=None,
  73. debug_mode=False,
  74. lang=None,
  75. ) -> PipeResult:
  76. pass