model_init.py 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242
  1. import os
  2. import torch
  3. from loguru import logger
  4. from .model_list import AtomicModel
  5. from ...model.layout.doclayout_yolo import DocLayoutYOLOModel
  6. from ...model.mfd.yolo_v8 import YOLOv8MFDModel
  7. from ...model.mfr.unimernet.Unimernet import UnimernetModel
  8. from ...model.ocr.paddleocr2pytorch.pytorch_paddle import PytorchPaddleOCR
  9. from ...model.ori_cls.paddle_ori_cls import PaddleOrientationClsModel
  10. from ...model.table.cls.paddle_table_cls import PaddleTableClsModel
  11. from ...model.table.rec.rapid_table import RapidTableModel
  12. from ...model.table.rec.unet_table.main import UnetTableModel
  13. from ...utils.enum_class import ModelPath
  14. from ...utils.models_download_utils import auto_download_and_get_model_root_path
  15. def img_orientation_cls_model_init():
  16. atom_model_manager = AtomModelSingleton()
  17. ocr_engine = atom_model_manager.get_atom_model(
  18. atom_model_name=AtomicModel.OCR,
  19. det_db_box_thresh=0.5,
  20. det_db_unclip_ratio=1.6,
  21. lang="ch_lite",
  22. enable_merge_det_boxes=False
  23. )
  24. cls_model = PaddleOrientationClsModel(ocr_engine)
  25. return cls_model
  26. def table_cls_model_init():
  27. return PaddleTableClsModel()
  28. def wired_table_model_init(lang=None):
  29. atom_model_manager = AtomModelSingleton()
  30. ocr_engine = atom_model_manager.get_atom_model(
  31. atom_model_name=AtomicModel.OCR,
  32. det_db_box_thresh=0.5,
  33. det_db_unclip_ratio=1.6,
  34. lang=lang,
  35. enable_merge_det_boxes=False
  36. )
  37. table_model = UnetTableModel(ocr_engine)
  38. return table_model
  39. def wireless_table_model_init(lang=None):
  40. atom_model_manager = AtomModelSingleton()
  41. ocr_engine = atom_model_manager.get_atom_model(
  42. atom_model_name=AtomicModel.OCR,
  43. det_db_box_thresh=0.5,
  44. det_db_unclip_ratio=1.6,
  45. lang=lang,
  46. enable_merge_det_boxes=False
  47. )
  48. table_model = RapidTableModel(ocr_engine)
  49. return table_model
  50. def mfd_model_init(weight, device='cpu'):
  51. if str(device).startswith('npu'):
  52. device = torch.device(device)
  53. mfd_model = YOLOv8MFDModel(weight, device)
  54. return mfd_model
  55. def mfr_model_init(weight_dir, device='cpu'):
  56. mfr_model = UnimernetModel(weight_dir, device)
  57. return mfr_model
  58. def doclayout_yolo_model_init(weight, device='cpu'):
  59. if str(device).startswith('npu'):
  60. device = torch.device(device)
  61. model = DocLayoutYOLOModel(weight, device)
  62. return model
  63. def ocr_model_init(det_db_box_thresh=0.3,
  64. lang=None,
  65. det_db_unclip_ratio=1.8,
  66. enable_merge_det_boxes=True
  67. ):
  68. if lang is not None and lang != '':
  69. model = PytorchPaddleOCR(
  70. det_db_box_thresh=det_db_box_thresh,
  71. lang=lang,
  72. use_dilation=True,
  73. det_db_unclip_ratio=det_db_unclip_ratio,
  74. enable_merge_det_boxes=enable_merge_det_boxes,
  75. )
  76. else:
  77. model = PytorchPaddleOCR(
  78. det_db_box_thresh=det_db_box_thresh,
  79. use_dilation=True,
  80. det_db_unclip_ratio=det_db_unclip_ratio,
  81. enable_merge_det_boxes=enable_merge_det_boxes,
  82. )
  83. return model
  84. class AtomModelSingleton:
  85. _instance = None
  86. _models = {}
  87. def __new__(cls, *args, **kwargs):
  88. if cls._instance is None:
  89. cls._instance = super().__new__(cls)
  90. return cls._instance
  91. def get_atom_model(self, atom_model_name: str, **kwargs):
  92. lang = kwargs.get('lang', None)
  93. if atom_model_name in [AtomicModel.WiredTable, AtomicModel.WirelessTable]:
  94. key = (atom_model_name, lang)
  95. elif atom_model_name in [AtomicModel.OCR]:
  96. key = (atom_model_name,
  97. kwargs.get('det_db_box_thresh', 0.3),
  98. lang, kwargs.get('det_db_unclip_ratio', 1.8),
  99. kwargs.get('enable_merge_det_boxes', True)
  100. )
  101. else:
  102. key = atom_model_name
  103. if key not in self._models:
  104. self._models[key] = atom_model_init(model_name=atom_model_name, **kwargs)
  105. return self._models[key]
  106. def atom_model_init(model_name: str, **kwargs):
  107. atom_model = None
  108. if model_name == AtomicModel.Layout:
  109. atom_model = doclayout_yolo_model_init(
  110. kwargs.get('doclayout_yolo_weights'),
  111. kwargs.get('device')
  112. )
  113. elif model_name == AtomicModel.MFD:
  114. atom_model = mfd_model_init(
  115. kwargs.get('mfd_weights'),
  116. kwargs.get('device')
  117. )
  118. elif model_name == AtomicModel.MFR:
  119. atom_model = mfr_model_init(
  120. kwargs.get('mfr_weight_dir'),
  121. kwargs.get('device')
  122. )
  123. elif model_name == AtomicModel.OCR:
  124. atom_model = ocr_model_init(
  125. kwargs.get('det_db_box_thresh', 0.3),
  126. kwargs.get('lang'),
  127. kwargs.get('det_db_unclip_ratio', 1.8),
  128. kwargs.get('enable_merge_det_boxes', True)
  129. )
  130. elif model_name == AtomicModel.WirelessTable:
  131. atom_model = wireless_table_model_init(
  132. kwargs.get('lang'),
  133. )
  134. elif model_name == AtomicModel.WiredTable:
  135. atom_model = wired_table_model_init(
  136. kwargs.get('lang'),
  137. )
  138. elif model_name == AtomicModel.TableCls:
  139. atom_model = table_cls_model_init()
  140. elif model_name == AtomicModel.ImgOrientationCls:
  141. atom_model = img_orientation_cls_model_init()
  142. else:
  143. logger.error('model name not allow')
  144. exit(1)
  145. if atom_model is None:
  146. logger.error('model init failed')
  147. exit(1)
  148. else:
  149. return atom_model
  150. class MineruPipelineModel:
  151. def __init__(self, **kwargs):
  152. self.formula_config = kwargs.get('formula_config')
  153. self.apply_formula = self.formula_config.get('enable', True)
  154. self.table_config = kwargs.get('table_config')
  155. self.apply_table = self.table_config.get('enable', True)
  156. self.lang = kwargs.get('lang', None)
  157. self.device = kwargs.get('device', 'cpu')
  158. logger.info(
  159. 'DocAnalysis init, this may take some times......'
  160. )
  161. atom_model_manager = AtomModelSingleton()
  162. if self.apply_formula:
  163. # 初始化公式检测模型
  164. self.mfd_model = atom_model_manager.get_atom_model(
  165. atom_model_name=AtomicModel.MFD,
  166. mfd_weights=str(
  167. os.path.join(auto_download_and_get_model_root_path(ModelPath.yolo_v8_mfd), ModelPath.yolo_v8_mfd)
  168. ),
  169. device=self.device,
  170. )
  171. # 初始化公式解析模型
  172. mfr_weight_dir = os.path.join(auto_download_and_get_model_root_path(ModelPath.unimernet_small), ModelPath.unimernet_small)
  173. self.mfr_model = atom_model_manager.get_atom_model(
  174. atom_model_name=AtomicModel.MFR,
  175. mfr_weight_dir=mfr_weight_dir,
  176. device=self.device,
  177. )
  178. # 初始化layout模型
  179. self.layout_model = atom_model_manager.get_atom_model(
  180. atom_model_name=AtomicModel.Layout,
  181. doclayout_yolo_weights=str(
  182. os.path.join(auto_download_and_get_model_root_path(ModelPath.doclayout_yolo), ModelPath.doclayout_yolo)
  183. ),
  184. device=self.device,
  185. )
  186. # 初始化ocr
  187. self.ocr_model = atom_model_manager.get_atom_model(
  188. atom_model_name=AtomicModel.OCR,
  189. det_db_box_thresh=0.3,
  190. lang=self.lang
  191. )
  192. # init table model
  193. if self.apply_table:
  194. self.wired_table_model = atom_model_manager.get_atom_model(
  195. atom_model_name=AtomicModel.WiredTable,
  196. lang=self.lang,
  197. )
  198. self.wireless_table_model = atom_model_manager.get_atom_model(
  199. atom_model_name=AtomicModel.WirelessTable,
  200. lang=self.lang,
  201. )
  202. self.table_cls_model = atom_model_manager.get_atom_model(
  203. atom_model_name=AtomicModel.TableCls,
  204. )
  205. self.img_orientation_cls_model = atom_model_manager.get_atom_model(
  206. atom_model_name=AtomicModel.ImgOrientationCls,
  207. lang=self.lang,
  208. )
  209. logger.info('DocAnalysis init done!')