pp_structure_v2.py 2.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475
  1. import random
  2. from loguru import logger
  3. from paddleocr import PPStructure
  4. def region_to_bbox(region):
  5. x0 = region[0][0]
  6. y0 = region[0][1]
  7. x1 = region[2][0]
  8. y1 = region[2][1]
  9. return [x0, y0, x1, y1]
  10. class CustomPaddleModel:
  11. def __init__(self, ocr: bool = False, show_log: bool = False):
  12. self.model = PPStructure(table=False, ocr=ocr, show_log=show_log)
  13. def __call__(self, img):
  14. result = self.model(img)
  15. spans = []
  16. for line in result:
  17. line.pop("img")
  18. """
  19. 为paddle输出适配type no.
  20. title: 0 # 标题
  21. text: 1 # 文本
  22. header: 2 # abandon
  23. footer: 2 # abandon
  24. reference: 1 # 文本 or abandon
  25. equation: 8 # 行间公式 block
  26. equation: 14 # 行间公式 text
  27. figure: 3 # 图片
  28. figure_caption: 4 # 图片描述
  29. table: 5 # 表格
  30. table_caption: 6 # 表格描述
  31. """
  32. if line["type"] == "title":
  33. line["category_id"] = 0
  34. elif line["type"] in ["text", "reference"]:
  35. line["category_id"] = 1
  36. elif line["type"] == "figure":
  37. line["category_id"] = 3
  38. elif line["type"] == "figure_caption":
  39. line["category_id"] = 4
  40. elif line["type"] == "table":
  41. line["category_id"] = 5
  42. elif line["type"] == "table_caption":
  43. line["category_id"] = 6
  44. elif line["type"] == "equation":
  45. line["category_id"] = 8
  46. elif line["type"] in ["header", "footer"]:
  47. line["category_id"] = 2
  48. else:
  49. logger.warning(f"unknown type: {line['type']}")
  50. # 兼容不输出score的paddleocr版本
  51. if line.get("score") is None:
  52. line["score"] = 0.5 + random.random() * 0.5
  53. res = line.pop("res", None)
  54. if res is not None and len(res) > 0:
  55. for span in res:
  56. new_span = {
  57. "category_id": 15,
  58. "bbox": region_to_bbox(span["text_region"]),
  59. "score": span["confidence"],
  60. "text": span["text"],
  61. }
  62. spans.append(new_span)
  63. if len(spans) > 0:
  64. result.extend(spans)
  65. return result