DocLayoutYOLO.py 2.3 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364
  1. from doclayout_yolo import YOLOv10
  2. class DocLayoutYOLOModel(object):
  3. def __init__(self, weight, device):
  4. self.model = YOLOv10(weight)
  5. if not device.startswith("cpu"):
  6. self.model.half()
  7. self.device = device
  8. def predict(self, image):
  9. layout_res = []
  10. doclayout_yolo_res = self.model.predict(
  11. image,
  12. imgsz=1280,
  13. conf=0.10,
  14. iou=0.45,
  15. verbose=False, device=self.device
  16. )[0]
  17. for xyxy, conf, cla in zip(
  18. doclayout_yolo_res.boxes.xyxy.cpu(),
  19. doclayout_yolo_res.boxes.conf.cpu(),
  20. doclayout_yolo_res.boxes.cls.cpu(),
  21. ):
  22. xmin, ymin, xmax, ymax = [int(p.item()) for p in xyxy]
  23. new_item = {
  24. "category_id": int(cla.item()),
  25. "poly": [xmin, ymin, xmax, ymin, xmax, ymax, xmin, ymax],
  26. "score": round(float(conf.item()), 3),
  27. }
  28. layout_res.append(new_item)
  29. return layout_res
  30. def batch_predict(self, images: list, batch_size: int) -> list:
  31. images_layout_res = []
  32. for index in range(0, len(images), batch_size):
  33. doclayout_yolo_res = [
  34. image_res.cpu()
  35. for image_res in self.model.predict(
  36. images[index : index + batch_size],
  37. imgsz=1280,
  38. conf=0.10,
  39. iou=0.45,
  40. verbose=False,
  41. device=self.device,
  42. )
  43. ]
  44. for image_res in doclayout_yolo_res:
  45. layout_res = []
  46. for xyxy, conf, cla in zip(
  47. image_res.boxes.xyxy,
  48. image_res.boxes.conf,
  49. image_res.boxes.cls,
  50. ):
  51. xmin, ymin, xmax, ymax = [int(p.item()) for p in xyxy]
  52. new_item = {
  53. "category_id": int(cla.item()),
  54. "poly": [xmin, ymin, xmax, ymin, xmax, ymax, xmin, ymax],
  55. "score": round(float(conf.item()), 3),
  56. }
  57. layout_res.append(new_item)
  58. images_layout_res.append(layout_res)
  59. return images_layout_res