| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106 |
- import os
- from typing import List, Union
- from tqdm import tqdm
- from ultralytics import YOLO
- import numpy as np
- from PIL import Image, ImageDraw
- from mineru.utils.enum_class import ModelPath
- from mineru.utils.models_download_utils import auto_download_and_get_model_root_path
- class YOLOv8MFDModel:
- def __init__(
- self,
- weight: str,
- device: str = "cpu",
- imgsz: int = 1888,
- conf: float = 0.25,
- iou: float = 0.45,
- ):
- self.model = YOLO(weight).to(device)
- self.device = device
- self.imgsz = imgsz
- self.conf = conf
- self.iou = iou
- def _run_predict(
- self,
- inputs: Union[np.ndarray, Image.Image, List],
- is_batch: bool = False
- ) -> List:
- preds = self.model.predict(
- inputs,
- imgsz=self.imgsz,
- conf=self.conf,
- iou=self.iou,
- verbose=False,
- device=self.device
- )
- return [pred.cpu() for pred in preds] if is_batch else preds[0].cpu()
- def predict(self, image: Union[np.ndarray, Image.Image]):
- return self._run_predict(image)
- def batch_predict(
- self,
- images: List[Union[np.ndarray, Image.Image]],
- batch_size: int = 4
- ) -> List:
- results = []
- with tqdm(total=len(images), desc="MFD Predict") as pbar:
- for idx in range(0, len(images), batch_size):
- batch = images[idx: idx + batch_size]
- batch_preds = self._run_predict(batch, is_batch=True)
- results.extend(batch_preds)
- pbar.update(len(batch))
- return results
- def visualize(
- self,
- image: Union[np.ndarray, Image.Image],
- results: List
- ) -> Image.Image:
- if isinstance(image, np.ndarray):
- image = Image.fromarray(image)
- formula_list = []
- for xyxy, conf, cla in zip(
- results.boxes.xyxy.cpu(), results.boxes.conf.cpu(), results.boxes.cls.cpu()
- ):
- xmin, ymin, xmax, ymax = [int(p.item()) for p in xyxy]
- new_item = {
- "category_id": 13 + int(cla.item()),
- "poly": [xmin, ymin, xmax, ymin, xmax, ymax, xmin, ymax],
- "score": round(float(conf.item()), 2),
- }
- formula_list.append(new_item)
- draw = ImageDraw.Draw(image)
- for res in formula_list:
- poly = res['poly']
- xmin, ymin, xmax, ymax = poly[0], poly[1], poly[4], poly[5]
- print(
- f"Detected box: {xmin}, {ymin}, {xmax}, {ymax}, Category ID: {res['category_id']}, Score: {res['score']}")
- # 使用PIL在图像上画框
- draw.rectangle([xmin, ymin, xmax, ymax], outline="red", width=2)
- # 在框旁边画置信度
- draw.text((xmax + 10, ymin + 10), f"{res['score']:.2f}", fill="red")
- return image
- if __name__ == '__main__':
- image_path = r"C:\Users\zhaoxiaomeng\Downloads\下载1.jpg"
- yolo_v8_mfd_weights = os.path.join(auto_download_and_get_model_root_path(ModelPath.yolo_v8_mfd),
- ModelPath.yolo_v8_mfd)
- device = 'cuda'
- model = YOLOv8MFDModel(
- weight=yolo_v8_mfd_weights,
- device=device,
- )
- image = Image.open(image_path)
- results = model.predict(image)
- image = model.visualize(image, results)
- image.show() # 显示图像
|