| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455 |
- from doclayout_yolo import YOLOv10
- class DocLayoutYOLOModel(object):
- def __init__(self, weight, device):
- self.model = YOLOv10(weight)
- self.device = device
- def predict(self, image):
- layout_res = []
- doclayout_yolo_res = self.model.predict(
- image, imgsz=1024, conf=0.25, iou=0.45, verbose=True, device=self.device
- )[0]
- for xyxy, conf, cla in zip(
- doclayout_yolo_res.boxes.xyxy.cpu(),
- doclayout_yolo_res.boxes.conf.cpu(),
- doclayout_yolo_res.boxes.cls.cpu(),
- ):
- xmin, ymin, xmax, ymax = [int(p.item()) for p in xyxy]
- new_item = {
- "category_id": int(cla.item()),
- "poly": [xmin, ymin, xmax, ymin, xmax, ymax, xmin, ymax],
- "score": round(float(conf.item()), 3),
- }
- layout_res.append(new_item)
- return layout_res
- def batch_predict(self, images: list, batch_size: int) -> list:
- images_layout_res = []
- for index in range(0, len(images), batch_size):
- doclayout_yolo_res = self.model.predict(
- images[index : index + batch_size],
- imgsz=1024,
- conf=0.25,
- iou=0.45,
- verbose=True,
- device=self.device,
- ).cpu()
- for image_res in doclayout_yolo_res:
- layout_res = []
- for xyxy, conf, cla in zip(
- image_res.boxes.xyxy,
- image_res.boxes.conf,
- image_res.boxes.cls,
- ):
- xmin, ymin, xmax, ymax = [int(p.item()) for p in xyxy]
- new_item = {
- "category_id": int(cla.item()),
- "poly": [xmin, ymin, xmax, ymin, xmax, ymax, xmin, ymax],
- "score": round(float(conf.item()), 3),
- }
- layout_res.append(new_item)
- images_layout_res.append(layout_res)
- return images_layout_res
|