magic_model.py 1.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960
  1. class MagicModel():
  2. """
  3. 每个函数没有得到元素的时候返回空list
  4. """
  5. def __fix_axis():
  6. # TODO 计算
  7. self.__model_list = xx
  8. def __init__(model_list:list, page:Page):
  9. self.__model_list = model_list
  10. self.__fix_axis()
  11. self.__page = page
  12. def get_imgs(self, page_no:int): # @许瑞
  13. return_lst = []
  14. image_block = {
  15. }
  16. img = {
  17. "bbox":[x0,y0,x1,y1]
  18. }
  19. img_caption = {
  20. "bbox":[x0,y0,x1,y1],
  21. "text":"",
  22. }
  23. image_block['bbox'] = [x0, y0, x1, y1]# 计算出来
  24. image_block['img_body'] = img
  25. image_blcok['img_caption'] = img_caption
  26. return [image_block,]
  27. def get_tables(self, page_no:int) ->list: # 3个坐标, caption, table主体,table-note
  28. pass # 许瑞, 结构和image一样
  29. def get_equations(self, page_no:int)->list: # 有坐标,也有字
  30. return inline_equations, interline_equations # @凯文
  31. def get_discarded(self, page_no:int)->list: # 自研模型,只有坐标
  32. pass # @凯文
  33. def get_text_blocks(self, page_no:int)->list: # 自研模型搞的,只有坐标,没有字
  34. pass # @凯文
  35. def get_title_blocks(self, page_no:int)->list: # 自研模型,只有坐标,没字
  36. pass # @凯文
  37. def get_ocr_text(self, page_no:int)->list: # paddle 搞的,有字也有坐标
  38. pass # @小蒙
  39. def get_ocr_spans(self, page_no:int)->list:
  40. pass # @小蒙