Browse Source

add examples for yolo anchor clustering

will-jl944 4 years ago
parent
commit
9b017a0ad6
1 changed files with 138 additions and 0 deletions
  1. 138 0
      docs/apis/datasets.md

+ 138 - 0
docs/apis/datasets.md

@@ -70,6 +70,77 @@ cluster_yolo_anchor(num_anchors, image_size, cache=True, cache_path=None, iters=
 > > * **gen_iters** (int):基因演算法迭代次数。
 > > * **thresh** (float):anchor尺寸与真值框尺寸之间比例的阈值。
 
+**代码示例**
+```python
+import paddlex as pdx
+from paddlex import transforms as T
+
+# 下载和解压昆虫检测数据集
+dataset = 'https://bj.bcebos.com/paddlex/datasets/insect_det.tar.gz'
+pdx.utils.download_and_decompress(dataset, path='./')
+
+# 定义训练和验证时的transforms
+# API说明:https://github.com/PaddlePaddle/PaddleX/blob/develop/docs/apis/transforms/transforms.md
+train_transforms = T.Compose([
+    T.MixupImage(mixup_epoch=-1), T.RandomDistort(),
+    T.RandomExpand(im_padding_value=[123.675, 116.28, 103.53]), T.RandomCrop(),
+    T.RandomHorizontalFlip(), T.BatchRandomResize(
+        target_sizes=[320, 352, 384, 416, 448, 480, 512, 544, 576, 608],
+        interp='RANDOM'), T.Normalize(
+            mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+])
+
+eval_transforms = T.Compose([
+    T.Resize(
+        target_size=608, interp='CUBIC'), T.Normalize(
+            mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+])
+
+# 定义训练和验证所用的数据集
+# API说明:https://github.com/PaddlePaddle/PaddleX/blob/develop/docs/apis/datasets.md
+train_dataset = pdx.datasets.VOCDetection(
+    data_dir='insect_det',
+    file_list='insect_det/train_list.txt',
+    label_list='insect_det/labels.txt',
+    transforms=train_transforms,
+    shuffle=True)
+
+eval_dataset = pdx.datasets.VOCDetection(
+    data_dir='insect_det',
+    file_list='insect_det/val_list.txt',
+    label_list='insect_det/labels.txt',
+    transforms=eval_transforms,
+    shuffle=False)
+
+# 在训练集上聚类生成9个anchor
+anchors = train_dataset.cluster_yolo_anchor(num_anchors=9, image_size=608)
+anchor_masks = [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
+
+# 初始化模型,并进行训练
+# 可使用VisualDL查看训练指标,参考https://github.com/PaddlePaddle/PaddleX/blob/develop/docs/train/visualdl.md
+num_classes = len(train_dataset.labels)
+model = pdx.det.PPYOLO(num_classes=num_classes,
+                       backbone='ResNet50_vd_dcn',
+                       anchors=anchors,
+                       anchor_masks=anchor_masks)
+
+# API说明:https://github.com/PaddlePaddle/PaddleX/blob/develop/docs/apis/models/detection.md
+# 各参数介绍与调整说明:https://github.com/PaddlePaddle/PaddleX/blob/develop/docs/parameters.md
+model.train(
+    num_epochs=200,
+    train_dataset=train_dataset,
+    train_batch_size=8,
+    eval_dataset=eval_dataset,
+    pretrain_weights='COCO',
+    learning_rate=0.005 / 12,
+    warmup_steps=500,
+    warmup_start_lr=0.0,
+    save_interval_epochs=5,
+    lr_decay_epochs=[85, 135],
+    save_dir='output/ppyolo_r50vd_dcn',
+    use_vdl=True)
+```
+
 ## <h2 id="3">paddlex.datasets.CocoDetection</h2>
 > **用于实例分割/目标检测模型**  
 ```python
@@ -112,6 +183,73 @@ cluster_yolo_anchor(num_anchors, image_size, cache=True, cache_path=None, iters=
 > > * **gen_iters** (int):基因演算法迭代次数。
 > > * **thresh** (float):anchor尺寸与真值框尺寸之间比例的阈值。
 
+**代码示例**
+```python
+import paddlex as pdx
+from paddlex import transforms as T
+
+# 下载和解压昆虫检测数据集
+dataset = 'https://bj.bcebos.com/paddlex/datasets/insect_det.tar.gz'
+pdx.utils.download_and_decompress(dataset, path='./')
+
+# 定义训练和验证时的transforms
+# API说明:https://github.com/PaddlePaddle/PaddleX/blob/develop/docs/apis/transforms/transforms.md
+train_transforms = T.Compose([
+    T.MixupImage(mixup_epoch=-1), T.RandomDistort(),
+    T.RandomExpand(im_padding_value=[123.675, 116.28, 103.53]), T.RandomCrop(),
+    T.RandomHorizontalFlip(), T.BatchRandomResize(
+        target_sizes=[320, 352, 384, 416, 448, 480, 512, 544, 576, 608],
+        interp='RANDOM'), T.Normalize(
+            mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+])
+
+eval_transforms = T.Compose([
+    T.Resize(
+        target_size=608, interp='CUBIC'), T.Normalize(
+            mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+])
+
+# 定义训练和验证所用的数据集
+# API说明:https://github.com/PaddlePaddle/PaddleX/blob/develop/docs/apis/datasets.md
+train_dataset = pdx.datasets.CocoDetection(
+    data_dir='xiaoduxiong_ins_det/JPEGImages',
+    ann_file='xiaoduxiong_ins_det/train.json',
+    transforms=train_transforms,
+    shuffle=True)
+eval_dataset = pdx.datasets.CocoDetection(
+    data_dir='xiaoduxiong_ins_det/JPEGImages',
+    ann_file='xiaoduxiong_ins_det/val.json',
+    transforms=eval_transforms)
+
+# 在训练集上聚类生成9个anchor
+anchors = train_dataset.cluster_yolo_anchor(num_anchors=9, image_size=608)
+anchor_masks = [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
+
+# 初始化模型,并进行训练
+# 可使用VisualDL查看训练指标,参考https://github.com/PaddlePaddle/PaddleX/blob/develop/docs/train/visualdl.md
+num_classes = len(train_dataset.labels)
+model = pdx.det.PPYOLO(num_classes=num_classes,
+                       backbone='ResNet50_vd_dcn',
+                       anchors=anchors,
+                       anchor_masks=anchor_masks)
+
+# API说明:https://github.com/PaddlePaddle/PaddleX/blob/develop/docs/apis/models/detection.md
+# 各参数介绍与调整说明:https://github.com/PaddlePaddle/PaddleX/blob/develop/docs/parameters.md
+model.train(
+    num_epochs=200,
+    train_dataset=train_dataset,
+    train_batch_size=8,
+    eval_dataset=eval_dataset,
+    pretrain_weights='COCO',
+    learning_rate=0.005 / 12,
+    warmup_steps=500,
+    warmup_start_lr=0.0,
+    save_interval_epochs=5,
+    lr_decay_epochs=[85, 135],
+    save_dir='output/ppyolo_r50vd_dcn',
+    use_vdl=True)
+```
+
 ## <h2 id="4">paddlex.datasets.SegDataset</h2>
 > **用于语义分割模型**  
 ```python