Browse Source

Merge branch 'develop' of https://github.com/PaddlePaddle/PaddleX into develop_kong

FlyingQianMM 5 years ago
parent
commit
6925722e1d
72 changed files with 2148 additions and 442 deletions
  1. 2 0
      .pre-commit-config.yaml
  2. 2 6
      deploy/cpp/include/paddlex/transforms.h
  3. 1 1
      deploy/cpp/src/paddlex.cpp
  4. 6 0
      docs/FAQ.md
  5. 3 1
      docs/apis/models/classification.md
  6. 61 0
      docs/apis/transforms/cls_transforms.md
  7. 130 0
      docs/apis/transforms/det_transforms.md
  8. 60 0
      docs/apis/transforms/seg_transforms.md
  9. 32 0
      docs/appendix/parameters.md
  10. BIN
      docs/images/._文件(p37) BDSZYF000132754-docs jiangjiajun$ pwd :Users:jiangjiajun:Downloads:PaddleX-develop:docs:vdl1.png
  11. BIN
      docs/images/文件(p37) BDSZYF000132754-docs jiangjiajun$ pwd :Users:jiangjiajun:Downloads:PaddleX-develop:docs:vdl1.png
  12. 28 1
      docs/paddlex_gui/download.md
  13. 148 1
      docs/paddlex_gui/how_to_use.md
  14. BIN
      docs/paddlex_gui/images/QR.jpg
  15. 1 0
      docs/paddlex_gui/images/ReadMe
  16. BIN
      docs/paddlex_gui/images/dataset2.jpg
  17. BIN
      docs/paddlex_gui/images/datasets1.jpg
  18. BIN
      docs/paddlex_gui/images/project1.jpg
  19. BIN
      docs/paddlex_gui/images/project2.jpg
  20. BIN
      docs/paddlex_gui/images/project3.jpg
  21. BIN
      docs/paddlex_gui/images/publish.jpg
  22. BIN
      docs/paddlex_gui/images/visualization1.jpg
  23. BIN
      docs/paddlex_gui/images/visualization2.jpg
  24. BIN
      docs/paddlex_gui/images/visualization3.jpg
  25. BIN
      docs/paddlex_gui/images/visualization4.jpg
  26. BIN
      docs/paddlex_gui/images/visualization5.jpg
  27. 1 1
      docs/paddlex_gui/index.rst
  28. 8 0
      docs/slim/index.rst
  29. 54 0
      docs/slim/prune.md
  30. 11 0
      docs/slim/quant.md
  31. 15 3
      docs/tutorials/deploy/deploy_server/deploy_cpp/deploy_cpp_linux.md
  32. 12 3
      docs/tutorials/deploy/deploy_server/deploy_cpp/deploy_cpp_win_vs2019.md
  33. 18 0
      new_tutorials/train/README.md
  34. 47 0
      new_tutorials/train/classification/mobilenetv2.py
  35. 56 0
      new_tutorials/train/classification/resnet50.py
  36. 49 0
      new_tutorials/train/detection/faster_rcnn_r50_fpn.py
  37. 48 0
      new_tutorials/train/detection/mask_rcnn_r50_fpn.py
  38. 48 0
      new_tutorials/train/detection/yolov3_darknet53.py
  39. 51 0
      new_tutorials/train/segmentation/deeplabv3p.py
  40. 47 0
      new_tutorials/train/segmentation/hrnet.py
  41. 47 0
      new_tutorials/train/segmentation/unet.py
  42. 1 1
      paddlex/__init__.py
  43. 1 0
      paddlex/cls.py
  44. 1 0
      paddlex/cv/models/__init__.py
  45. 9 13
      paddlex/cv/models/base.py
  46. 41 9
      paddlex/cv/models/classifier.py
  47. 4 2
      paddlex/cv/models/deeplabv3p.py
  48. 16 10
      paddlex/cv/models/faster_rcnn.py
  49. 11 2
      paddlex/cv/models/load_model.py
  50. 16 10
      paddlex/cv/models/mask_rcnn.py
  51. 38 8
      paddlex/cv/models/slim/prune.py
  52. 5 2
      paddlex/cv/models/slim/prune_config.py
  53. 8 4
      paddlex/cv/models/utils/pretrain_weights.py
  54. 15 8
      paddlex/cv/models/yolo_v3.py
  55. 6 0
      paddlex/cv/nets/__init__.py
  56. 170 0
      paddlex/cv/nets/alexnet.py
  57. 2 2
      paddlex/cv/nets/hrnet.py
  58. 6 1
      paddlex/cv/transforms/cls_transforms.py
  59. 8 3
      paddlex/cv/transforms/det_transforms.py
  60. 7 2
      paddlex/cv/transforms/seg_transforms.py
  61. 24 8
      paddlex/interpret/core/_session_preparation.py
  62. 7 11
      paddlex/interpret/core/interpretation.py
  63. 316 106
      paddlex/interpret/core/interpretation_algorithms.py
  64. 104 68
      paddlex/interpret/core/lime_base.py
  65. 219 38
      paddlex/interpret/core/normlime_base.py
  66. 18 9
      paddlex/interpret/interpretation_predict.py
  67. 85 88
      paddlex/interpret/visualize.py
  68. 1 1
      paddlex/slim.py
  69. 2 2
      paddlex/tools/x2coco.py
  70. 8 8
      paddlex/utils/logging.py
  71. 1 1
      setup.py
  72. 12 8
      tutorials/interpret/normlime.py

+ 2 - 0
.pre-commit-config.yaml

@@ -23,6 +23,7 @@
         files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto|py)$
         exclude: (?!.*third_party)^.*$
 
+-   repo: local
     hooks:
     -   id: clang-format-with-version-check
         name: clang-format
@@ -31,6 +32,7 @@
         language: system
         files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto)$
 
+-   repo: local
     hooks:
     -   id: cpplint-cpp-source
         name: cpplint

+ 2 - 6
deploy/cpp/include/paddlex/transforms.h

@@ -83,7 +83,7 @@ class ResizeByShort : public Transform {
     } else {
       max_size_ = -1;
     }
-  };
+  }
   virtual bool Run(cv::Mat* im, ImageBlob* data);
 
  private:
@@ -96,7 +96,7 @@ class ResizeByLong : public Transform {
  public:
   virtual void Init(const YAML::Node& item) {
     long_size_ = item["long_size"].as<int>();
-  };
+  }
   virtual bool Run(cv::Mat* im, ImageBlob* data);
 
  private:
@@ -167,9 +167,6 @@ class Padding : public Transform {
         height_ = item["target_size"].as<std::vector<int>>()[1];
       }
     }
-    if (item["im_padding_value"].IsDefined()) {
-      value_ = item["im_padding_value"].as<std::vector<float>>();
-    }
   }
   virtual bool Run(cv::Mat* im, ImageBlob* data);
 
@@ -177,7 +174,6 @@ class Padding : public Transform {
   int coarsest_stride_ = -1;
   int width_ = 0;
   int height_ = 0;
-  std::vector<float> value_;
 };
 
 class Transforms {

+ 1 - 1
deploy/cpp/src/paddlex.cpp

@@ -98,7 +98,7 @@ bool Model::load_config(const std::string& model_dir) {
 
 bool Model::preprocess(const cv::Mat& input_im, ImageBlob* blob) {
   cv::Mat im = input_im.clone();
-  if (!transforms_.Run(&im, &inputs_)) {
+  if (!transforms_.Run(&im, blob)) {
     return false;
   }
   return true;

+ 6 - 0
docs/FAQ.md

@@ -60,3 +60,9 @@
 ## 11. 每次训练新的模型,都需要重新下载预训练模型,怎样可以下载一次就搞定
 > 1.可以按照9的方式来解决这个问题  
 > 2.每次训练前都设定`paddlex.pretrain_dir`路径,如设定`paddlex.pretrain_dir='/usrname/paddlex`,如此下载完的预训练模型会存放至`/usrname/paddlex`目录下,而已经下载在该目录的模型也不会再次重复下载
+
+## 12. 程序启动时提示"Failed to execute script PaddleX",如何解决?
+> 1. 请检查目标机器上PaddleX程序所在路径是否包含中文。目前暂不支持中文路径,请尝试将程序移动到英文目录。
+> 2. 如果您的系统是Windows 7或者Windows Server 2012时,原因是缺少MFPlat.DLL/MF.dll/MFReadWrite.dll等OpenCV依赖的DLL,请按如下方式安装桌面体验:通过“我的电脑”-->“属性”-->"管理"打开服务器管理器,点击右上角“管理”选择“添加角色和功能”。点击“服务器选择”-->“功能”,拖动滚动条到最下端,点开“用户界面和基础结构”,勾选“桌面体验”后点击“安装”,等安装完成尝试再次运行PaddleX。
+> 3. 请检查目标机器上是否有其他的PaddleX程序或者进程在运行中,如有请退出或者重启机器看是否解决
+> 4. 请确认运行程序的用户是否有管理员权限,如非管理员权限用户请尝试使用管理员运行看是否成功

+ 3 - 1
docs/apis/models/classification.md

@@ -15,7 +15,7 @@ paddlex.cls.ResNet50(num_classes=1000)
 ### train 训练接口
 
 ```python
-train(self, num_epochs, train_dataset, train_batch_size=64, eval_dataset=None, save_interval_epochs=1, log_interval_steps=2, save_dir='output', pretrain_weights='IMAGENET', optimizer=None, learning_rate=0.025, lr_decay_epochs=[30, 60, 90], lr_decay_gamma=0.1, use_vdl=False, sensitivities_file=None, eval_metric_loss=0.05, early_stop=False, early_stop_patience=5, resume_checkpoint=None)
+train(self, num_epochs, train_dataset, train_batch_size=64, eval_dataset=None, save_interval_epochs=1, log_interval_steps=2, save_dir='output', pretrain_weights='IMAGENET', optimizer=None, learning_rate=0.025, warmup_steps=0, warmup_start_lr=0.0, lr_decay_epochs=[30, 60, 90], lr_decay_gamma=0.1, use_vdl=False, sensitivities_file=None, eval_metric_loss=0.05, early_stop=False, early_stop_patience=5, resume_checkpoint=None)
 ```
 >
 > **参数**
@@ -30,6 +30,8 @@ train(self, num_epochs, train_dataset, train_batch_size=64, eval_dataset=None, s
 > > - **pretrain_weights** (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'IMAGENET',则自动下载在ImageNet图片数据上预训练的模型权重;若为None,则不使用预训练模型。默认为'IMAGENET'。
 > > - **optimizer** (paddle.fluid.optimizer): 优化器。当该参数为None时,使用默认优化器:fluid.layers.piecewise_decay衰减策略,fluid.optimizer.Momentum优化方法。
 > > - **learning_rate** (float): 默认优化器的初始学习率。默认为0.025。
+> > - **warmup_steps** (int): 默认优化器的warmup步数,学习率将在设定的步数内,从warmup_start_lr线性增长至设定的learning_rate,默认为0。
+> > - **warmup_start_lr**(float): 默认优化器的warmup起始学习率,默认为0.0。
 > > - **lr_decay_epochs** (list): 默认优化器的学习率衰减轮数。默认为[30, 60, 90]。
 > > - **lr_decay_gamma** (float): 默认优化器的学习率衰减率。默认为0.1。
 > > - **use_vdl** (bool): 是否使用VisualDL进行可视化。默认值为False。

+ 61 - 0
docs/apis/transforms/cls_transforms.md

@@ -122,3 +122,64 @@ paddlex.cls.transforms.RandomDistort(brightness_range=0.9, brightness_prob=0.5,
 * **saturation_prob** (float): 随机调整饱和度的概率。默认为0.5。
 * **hue_range** (int): 色调因子的范围。默认为18。
 * **hue_prob** (float): 随机调整色调的概率。默认为0.5。
+
+## ComposedClsTransforms类
+```python
+paddlex.cls.transforms.ComposedClsTransforms(mode, crop_size=[224, 224], mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+```
+分类模型中已经组合好的数据处理流程,开发者可以直接使用ComposedClsTransforms,简化手动组合transforms的过程, 该类中已经包含了[RandomCrop](#RandomCrop)和[RandomHorizontalFlip](#RandomHorizontalFlip)两种数据增强方式,你仍可以通过[add_augmenters函数接口](#add_augmenters)添加新的数据增强方式。  
+ComposedClsTransforms共包括以下几个步骤:
+> 训练阶段:
+> > 1. 随机从图像中crop一块子图,并resize成crop_size大小
+> > 2. 将1的输出按0.5的概率随机进行水平翻转
+> > 3. 将图像进行归一化
+> 验证/预测阶段:
+> > 1. 将图像按比例Resize,使得最小边长度为crop_size[0] * 1.14
+> > 2. 从图像中心crop出一个大小为crop_size的图像
+> > 3. 将图像进行归一化
+
+### 参数
+* **mode** (str): Transforms所处的阶段,包括`train', 'eval'或'test'
+* **crop_size** (int|list): 输入到模型里的图像大小,默认为[224, 224](与原图大小无关,根据上述几个步骤,会将原图处理成该图大小输入给模型训练)
+* **mean** (list): 图像均值, 默认为[0.485, 0.456, 0.406]。
+* **std** (list): 图像方差,默认为[0.229, 0.224, 0.225]。
+
+### 添加数据增强方式
+```python
+ComposedClsTransforms.add_augmenters(augmenters)
+```
+> **参数**
+> * **augmenters**(list): 数据增强方式列表
+
+#### 使用示例
+```
+import paddlex as pdx
+from paddlex.cls import transforms
+train_transforms = transforms.ComposedClsTransforms(mode='train', crop_size=[320, 320])
+eval_transforms = transforms.ComposedClsTransforms(mode='eval', crop_size=[320, 320])
+
+# 添加数据增强
+import imgaug.augmenters as iaa
+train_transforms.add_augmenters([
+			transforms.RandomDistort(),
+			iaa.blur.GaussianBlur(sigma=(0.0, 3.0))
+])
+```
+上面代码等价于
+```
+import paddlex as pdx
+from paddlex.cls import transforms
+train_transforms = transforms.Composed([
+		transforms.RandomDistort(),
+		iaa.blur.GaussianBlur(sigma=(0.0, 3.0)),
+		# 上面两个为通过add_augmenters额外添加的数据增强方式
+		transforms.RandomCrop(crop_size=320),
+		transforms.RandomHorizontalFlip(prob=0.5),
+		transforms.Normalize()
+])
+eval_transforms = transforms.Composed([
+		transforms.ResizeByShort(short_size=int(320*1.14)),
+		transforms.CenterCrop(crop_size=320),
+		transforms.Normalize()
+])
+```

+ 130 - 0
docs/apis/transforms/det_transforms.md

@@ -167,3 +167,133 @@ paddlex.det.transforms.RandomCrop(aspect_ratio=[.5, 2.], thresholds=[.0, .1, .3,
 * **num_attempts** (int): 在放弃寻找有效裁剪区域前尝试的次数。默认值为50。
 * **allow_no_crop** (bool): 是否允许未进行裁剪。默认值为True。
 * **cover_all_box** (bool): 是否要求所有的真实标注框都必须在裁剪区域内。默认值为False。
+
+## ComposedRCNNTransforms类
+```python
+paddlex.det.transforms.ComposedRCNNTransforms(mode, min_max_size=[224, 224], mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+```
+目标检测FasterRCNN和实例分割MaskRCNN模型中已经组合好的数据处理流程,开发者可以直接使用ComposedRCNNTransforms,简化手动组合transforms的过程, 该类中已经包含了[RandomHorizontalFlip](#RandomHorizontalFlip)数据增强方式,你仍可以通过[add_augmenters函数接口](#add_augmenters)添加新的数据增强方式。  
+ComposedRCNNTransforms共包括以下几个步骤:
+> 训练阶段:
+> > 1. 随机以0.5的概率将图像水平翻转
+> > 2. 将图像进行归一化
+> > 3. 图像采用[ResizeByShort](#ResizeByShort)方式,根据min_max_size参数,进行缩入
+> > 4. 使用[Padding](#Padding)将图像的长和宽分别Padding成32的倍数
+> 验证/预测阶段:
+> > 1. 将图像进行归一化
+> > 2. 图像采用[ResizeByShort](#ResizeByShort)方式,根据min_max_size参数,进行缩入
+> > 3. 使用[Padding](#Padding)将图像的长和宽分别Padding成32的倍数
+
+### 参数
+* **mode** (str): Transforms所处的阶段,包括`train', 'eval'或'test'
+* **min_max_size** (list): 输入模型中图像的最短边长度和最长边长度,参考[ResizeByShort](#ResizeByShort)(与原图大小无关,根据上述几个步骤,会将原图处理成相应大小输入给模型训练),默认[800, 1333]
+* **mean** (list): 图像均值, 默认为[0.485, 0.456, 0.406]。
+* **std** (list): 图像方差,默认为[0.229, 0.224, 0.225]。
+
+### 添加数据增强方式
+```python
+ComposedRCNNTransforms.add_augmenters(augmenters)
+```
+> **参数**
+> * **augmenters**(list): 数据增强方式列表
+
+#### 使用示例
+```
+import paddlex as pdx
+from paddlex.det import transforms
+train_transforms = transforms.ComposedRCNNTransforms(mode='train', min_max_size=[800, 1333])
+eval_transforms = transforms.ComposedRCNNTransforms(mode='eval', min_max_size=[800, 1333])
+
+# 添加数据增强
+import imgaug.augmenters as iaa
+train_transforms.add_augmenters([
+			transforms.RandomDistort(),
+			iaa.blur.GaussianBlur(sigma=(0.0, 3.0))
+])
+```
+上面代码等价于
+```
+import paddlex as pdx
+from paddlex.det import transforms
+train_transforms = transforms.Composed([
+		transforms.RandomDistort(),
+		iaa.blur.GaussianBlur(sigma=(0.0, 3.0)),
+		# 上面两个为通过add_augmenters额外添加的数据增强方式
+		transforms.RandomHorizontalFlip(prob=0.5),
+		transforms.Normalize(),
+        transforms.ResizeByShort(short_size=800, max_size=1333),
+        transforms.Padding(coarsest_stride=32)
+])
+eval_transforms = transforms.Composed([
+		transforms.Normalize(),
+        transforms.ResizeByShort(short_size=800, max_size=1333),
+        transforms.Padding(coarsest_stride=32)
+])
+```
+
+
+## ComposedYOLOv3Transforms类
+```python
+paddlex.det.transforms.ComposedYOLOv3Transforms(mode, shape=[608, 608], mixup_epoch=250, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+```
+目标检测YOLOv3模型中已经组合好的数据处理流程,开发者可以直接使用ComposedYOLOv3Transforms,简化手动组合transforms的过程, 该类中已经包含了[MixupImage](#MixupImage)、[RandomDistort](#RandomDistort)、[RandomExpand](#RandomExpand)、[RandomCrop](#RandomCrop)、[RandomHorizontalFlip](#RandomHorizontalFlip)5种数据增强方式,你仍可以通过[add_augmenters函数接口](#add_augmenters)添加新的数据增强方式。  
+ComposedYOLOv3Transforms共包括以下几个步骤:
+> 训练阶段:
+> > 1. 在前mixup_epoch轮迭代中,使用MixupImage策略
+> > 2. 对图像进行随机扰动,包括亮度,对比度,饱和度和色调
+> > 3. 随机扩充图像
+> > 4. 随机裁剪图像
+> > 5. 将4步骤的输出图像Resize成shape参数的大小
+> > 6. 随机0.5的概率水平翻转图像
+> > 7. 图像归一化
+> 验证/预测阶段:
+> > 1. 将图像Resize成shape参数大小
+> > 2. 图像归一化
+
+### 参数
+* **mode** (str): Transforms所处的阶段,包括`train', 'eval'或'test'
+* **shape** (list): 输入模型中图像的大小(与原图大小无关,根据上述几个步骤,会将原图处理成相应大小输入给模型训练), 默认[608, 608]
+* **mixup_epoch**(int): 模型训练过程中,在前mixup_epoch轮迭代中,使用mixup策略,如果为-1,则不使用mixup策略, 默认250。
+* **mean** (list): 图像均值, 默认为[0.485, 0.456, 0.406]。
+* **std** (list): 图像方差,默认为[0.229, 0.224, 0.225]。
+
+### 添加数据增强方式
+```python
+ComposedYOLOv3Transforms.add_augmenters(augmenters)
+```
+> **参数**
+> * **augmenters**(list): 数据增强方式列表
+
+#### 使用示例
+```
+import paddlex as pdx
+from paddlex.det import transforms
+train_transforms = transforms.ComposedYOLOv3Transforms(mode='train', shape=[480, 480])
+eval_transforms = transforms.ComposedYOLOv3Transforms(mode='eval', shape=[480, 480])
+
+# 添加数据增强
+import imgaug.augmenters as iaa
+train_transforms.add_augmenters([
+			iaa.blur.GaussianBlur(sigma=(0.0, 3.0))
+])
+```
+上面代码等价于
+```
+import paddlex as pdx
+from paddlex.det import transforms
+train_transforms = transforms.Composed([
+		iaa.blur.GaussianBlur(sigma=(0.0, 3.0)),
+		# 上面为通过add_augmenters额外添加的数据增强方式
+        transforms.MixupImage(mixup_epoch=250),
+        transforms.RandomDistort(),
+        transforms.RandomExpand(),
+        transforms.RandomCrop(),
+        transforms.Resize(target_size=480, interp='RANDOM'),
+        transforms.RandomHorizontalFlip(prob=0.5),
+        transforms.Normalize()
+])
+eval_transforms = transforms.Composed([
+        transforms.Resize(target_size=480, interp='CUBIC'),
+		transforms.Normalize()
+])
+```

+ 60 - 0
docs/apis/transforms/seg_transforms.md

@@ -166,3 +166,63 @@ paddlex.seg.transforms.RandomDistort(brightness_range=0.5, brightness_prob=0.5,
 * **saturation_prob** (float): 随机调整饱和度的概率。默认为0.5。
 * **hue_range** (int): 色调因子的范围。默认为18。
 * **hue_prob** (float): 随机调整色调的概率。默认为0.5。
+
+## ComposedSegTransforms类
+```python
+paddlex.det.transforms.ComposedSegTransforms(mode, train_crop_shape=[769, 769], mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+```
+语义分割DeepLab和UNet模型中已经组合好的数据处理流程,开发者可以直接使用ComposedSegTransforms,简化手动组合transforms的过程, 该类中已经包含了[RandomHorizontalFlip](#RandomHorizontalFlip)、[ResizeStepScaling](#ResizeStepScaling)、[RandomPaddingCrop](#RandomPaddingCrop)3种数据增强方式,你仍可以通过[add_augmenters函数接口](#add_augmenters)添加新的数据增强方式。  
+ComposedSegTransforms共包括以下几个步骤:
+ > 训练阶段:
+> > 1. 随机对图像以0.5的概率水平翻转
+> > 2. 按不同的比例随机Resize原图
+> > 3. 从原图中随机crop出大小为train_crop_size大小的子图,如若crop出来的图小于train_crop_size,则会将图padding到对应大小
+> > 4. 图像归一化
+ > 预测阶段:
+> > 1. 图像归一化
+
+
+### 参数
+* **mode** (str): Transforms所处的阶段,包括`train', 'eval'或'test'
+* **train_crop_size** (list): 训练过程中随机Crop和Resize后(验证或预测过程中不需配置该参数,自动使用原图大小),输入到模型中图像的大小(与原图大小无关,根据上述几个步骤,会将原图处理成相应大小输入给模型训练), 默认[769, 769]
+* **mean** (list): 图像均值, 默认为[0.485, 0.456, 0.406]。
+* **std** (list): 图像方差,默认为[0.229, 0.224, 0.225]。
+
+### 添加数据增强方式
+```python
+ComposedSegTransforms.add_augmenters(augmenters)
+```
+> **参数**
+> * **augmenters**(list): 数据增强方式列表
+
+#### 使用示例
+```
+import paddlex as pdx
+from paddlex.seg import transforms
+train_transforms = transforms.ComposedSegTransforms(mode='train', train_crop_size=[512, 512])
+eval_transforms = transforms.ComposedYOLOTransforms(mode='eval')
+
+# 添加数据增强
+import imgaug.augmenters as iaa
+train_transforms.add_augmenters([
+			transforms.RandomDistort(),
+			iaa.blur.GaussianBlur(sigma=(0.0, 3.0))
+])
+```
+上面代码等价于
+```
+import paddlex as pdx
+from paddlex.det import transforms
+train_transforms = transforms.Composed([
+		transforms.RandomDistort(),
+		iaa.blur.GaussianBlur(sigma=(0.0, 3.0)),
+		# 上面2行为通过add_augmenters额外添加的数据增强方式
+        transforms.RandomHorizontalFlip(prob=0.5),
+        transforms.ResizeStepScaling(),
+        transforms.PaddingCrop(crop_size=[512, 512]),
+        transforms.Normalize()
+])
+eval_transforms = transforms.Composed([
+        transforms.Normalize()
+])
+```

+ 32 - 0
docs/appendix/parameters.md

@@ -23,3 +23,35 @@ Batch Size指模型在训练过程中,一次性处理的样本数量, 如若
 - [实例分割MaskRCNN-train](https://paddlex.readthedocs.io/zh_CN/latest/apis/models/instance_segmentation.html#train)
 - [语义分割DeepLabv3p-train](https://paddlex.readthedocs.io/zh_CN/latest/apis/models/semantic_segmentation.html#train)
 - [语义分割UNet](https://paddlex.readthedocs.io/zh_CN/latest/apis/models/semantic_segmentation.html#id2)
+
+## 关于lr_decay_epoch, warmup_steps等参数的说明
+
+在PaddleX或其它深度学习模型的训练过程中,经常见到lr_decay_epoch, warmup_steps, warmup_start_lr等参数设置,下面介绍一些这些参数的作用。  
+
+首先这些参数都是用于控制模型训练过程中学习率的变化方式,例如我们在训练时将learning_rate设为0.1, 通常情况,在模型的训练过程中,学习率一直以0.1不变训练下去, 但为了调出更好的模型效果,我们往往不希望学习率一直保持不变。
+
+### warmup_steps和warmup_start_lr
+
+我们在训练模型时,一般都会使用预训练模型,例如检测模型在训练时使用backbone在ImageNet数据集上的预训练权重。但由于在自行训练时,自己的数据与ImageNet数据集存在较大的差异,可能会一开始由于梯度过大使得训练出现问题,因此可以在刚开始训练时,让学习率以一个较小的值,慢慢增长到设定的学习率。因此`warmup_steps`和`warmup_start_lr`就是这个作用,模型开始训练时,学习率会从`warmup_start_lr`开始,在`warmup_steps`内线性增长到设定的学习率。
+
+### lr_decay_epochs和lr_decay_gamma
+
+`lr_decay_epochs`用于让学习率在模型训练后期逐步衰减,它一般是一个list,如[6, 8, 10],表示学习率在第6个epoch时衰减一次,第8个epoch时再衰减一次,第10个epoch时再衰减一次。每次学习率衰减为之前的学习率*lr_decay_gamma
+
+### Notice
+
+在PaddleX中,限制warmup需要在第一个学习率decay衰减前结束,因此要满足下面的公式
+```
+warmup_steps <= lr_decay_epochs[0] * num_steps_each_epoch
+```
+其中公式中`num_steps_each_epoch = num_samples_in_train_dataset // train_batch_size`。  
+
+>  因此如若在训练时PaddleX提示`warmup_steps should be less than xxx`时,即可根据上述公式来调整你的`lr_decay_epochs`或者是`warmup_steps`使得两个参数满足上面的条件
+
+> - 图像分类模型 [train接口文档](https://paddlex.readthedocs.io/zh_CN/latest/apis/models/classification.html#train)
+> - FasterRCNN [train接口文档](https://paddlex.readthedocs.io/zh_CN/latest/apis/models/detection.html#fasterrcnn)
+> - YOLOv3 [train接口文档](https://paddlex.readthedocs.io/zh_CN/latest/apis/models/detection.html#yolov3)
+> - MaskRCNN [train接口文档](https://paddlex.readthedocs.io/zh_CN/latest/apis/models/instance_segmentation.html#maskrcnn)
+> - DeepLab [train接口文档](https://paddlex.readthedocs.io/zh_CN/latest/apis/models/semantic_segmentation.html#deeplabv3p)
+> - UNet [train接口文档](https://paddlex.readthedocs.io/zh_CN/latest/apis/models/semantic_segmentation.html#unet)
+> - HRNet [train接口文档](https://paddlex.readthedocs.io/zh_CN/latest/apis/models/semantic_segmentation.html#hrnet)

BIN
docs/images/._文件(p37) BDSZYF000132754-docs jiangjiajun$ pwd :Users:jiangjiajun:Downloads:PaddleX-develop:docs:vdl1.png


BIN
docs/images/文件(p37) BDSZYF000132754-docs jiangjiajun$ pwd :Users:jiangjiajun:Downloads:PaddleX-develop:docs:vdl1.png


+ 28 - 1
docs/paddlex_gui/download.md

@@ -1 +1,28 @@
-# PaddleX GUI下载安装
+## <a name="2">PaddleX GUI安装</a>
+
+ PaddleX GUI是提升项目开发效率的核心模块,开发者可快速完成深度学习模型全流程开发。我们诚挚地邀请您前往 [官网](https://www.paddlepaddle.org.cn/paddle/paddleX)下载试用PaddleX GUI可视化前端,并获得您宝贵的意见或开源项目贡献。
+
+
+
+#### <a name="1">安装推荐环境</a>
+
+* **操作系统**:
+  * Windows7/8/10(推荐Windows 10);
+  * Mac OS 10.13+;
+  * Ubuntu 18.04+;
+
+***注:处理器需为x86_64架构,支持MKL。***
+
+* **训练硬件**:  
+  * **GPU**(仅Windows及Linux系统):  
+    推荐使用支持CUDA的NVIDIA显卡,例如:GTX 1070+以上性能的显卡;  
+    Windows系统X86_64驱动版本>=411.31;  
+    Linux系统X86_64驱动版本>=410.48;  
+    显存8G以上;  
+  * **CPU**:  
+    PaddleX当前支持您用本地CPU进行训练,但推荐使用GPU以获得更好的开发体验。
+  * **内存**:建议8G以上  
+  * **硬盘空间**:建议SSD剩余空间1T以上(非必须)  
+
+***注:PaddleX在Windows及Mac OS系统只支持单卡模型。Windows系统暂不支持NCCL。***
+

+ 148 - 1
docs/paddlex_gui/how_to_use.md

@@ -1 +1,148 @@
-# PaddleX GUI如何训练模型
+# PaddleX GUI使用文档
+
+飞桨全流程开发工具,集飞桨核心框架、模型库、工具及组件等深度学习开发全流程所需能力于一身,易用易集成,是开发者快速入门深度学习、提升深度学习项目开发效率的最佳辅助工具。
+
+PaddleX GUI是一个应用PaddleX实现的一个图形化开发客户端产品,它使得开发者通过键入式输入即可完成深度学习模型全流程开发,可大幅度提升项目开发效率。飞桨团队期待各位开发者基于PaddleX,实现出各种符合自己产业实际需求的产品。
+
+我们诚挚地邀请您前往 [官网](https://www.paddlepaddle.org.cn/paddlex)下载试用PaddleX GUI,并获得您宝贵的意见或开源项目贡献。
+
+
+
+## 目录
+
+* **产品特性**
+* **PaddleX GUI可视化前端**
+* **FAQ**
+
+
+
+## 产品特性
+
+\-  **全流程打通**
+
+将深度学习开发全流程打通,并提供可视化开发界面, 省去了对各环节API的熟悉过程及重复的代码开发,极大地提升了开发效率。
+
+\-  **易用易集成**
+
+提供功能最全、最灵活的Python API开发模式,完全开源开放,易于集成和二次开发。键入式输入的图形化开发界面,使得非专业算法人员也可快速进行业务POC。
+
+\-  **融合产业实践经验**
+
+融合飞桨产业落地经验,精选高质量的视觉模型方案,开放实际的案例教学,手把手带您实现产业需求落地。
+
+\-  **教程与服务**
+
+从数据集准备到上线部署,为您提供业务开发全流程的文档说明及技术服务。开发者可以通过QQ群、微信群、GitHub社区等多种形式与飞桨团队及同业合作伙伴交流沟通。
+
+
+
+## PaddleX GUI 可视化前端
+
+**第一步:准备数据**
+
+在开始模型训练前,您需要根据不同的任务类型,将数据标注为相应的格式。目前PaddleX支持【图像分类】、【目标检测】、【语义分割】、【实例分割】四种任务类型。不同类型任务的数据处理方式可查看[数据标注方式](https://paddlex.readthedocs.io/zh_CN/latest/appendix/datasets.html)。
+
+ 
+
+**第二步:导入我的数据集**
+
+①数据标注完成后,您需要根据不同的任务,将数据和标注文件,按照客户端提示更名并保存到正确的文件中。
+
+②在客户端新建数据集,选择与数据集匹配的任务类型,并选择数据集对应的路径,将数据集导入。
+
+![](images/datasets1.jpg)
+
+③选定导入数据集后,客户端会自动校验数据及标注文件是否合规,校验成功后,您可根据实际需求,将数据集按比例划分为训练集、验证集、测试集。
+
+④您可在「数据分析」模块按规则预览您标注的数据集,双击单张图片可放大查看。
+
+![](images/dataset2.jpg)
+
+**第三步:创建项目**
+
+① 在完成数据导入后,您可以点击「新建项目」创建一个项目。
+
+② 您可根据实际任务需求选择项目的任务类型,需要注意项目所采用的数据集也带有任务类型属性,两者需要进行匹配。
+
+![](images/project3.jpg)
+
+
+
+**第四步:项目开发**
+
+① **数据选择**:项目创建完成后,您需要选择已载入客户端并校验后的数据集,并点击下一步,进入参数配置页面。
+
+![](images/project1.jpg)
+
+② **参数配置**:主要分为**模型参数**、**训练参数**、**优化策略**三部分。您可根据实际需求选择模型结构、骨架网络及对应的训练参数、优化策略,使得任务效果最佳。
+
+![](images/project2.jpg)
+
+参数配置完成后,点击启动训练,模型开始训练并进行效果评估。
+
+③ **训练可视化**:在训练过程中,您可通过VisualDL查看模型训练过程参数变化、日志详情,及当前最优的训练集和验证集训练指标。模型在训练过程中通过点击"中止训练"随时中止训练过程。
+
+![](images/visualization1.jpg)
+
+模型训练结束后,可选择进入『模型剪裁分析』或者直接进入『模型评估』。
+
+![](images/visualization2.jpg)
+
+④ **模型裁剪**:如果开发者希望减少模型的体积、计算量,提升模型在设备上的预测性能,可以采用PaddleX提供的模型裁剪策略。裁剪过程将对模型各卷积层的敏感度信息进行分析,根据各参数对模型效果的影响进行不同比例的裁剪,再进行精调训练获得最终裁剪后的模型。
+
+![](images/visualization3.jpg)
+
+⑤ **模型评估**:在模型评估页面,您可查看训练后的模型效果。评估方法包括混淆矩阵、精度、召回率等。
+
+![](images/visualization4.jpg)
+
+您还可以选择『数据集切分』时留出的『测试数据集』或从本地文件夹中导入一张/多张图片,将训练后的模型进行测试。根据测试结果,您可决定是否将训练完成的模型保存为预训练模型并进入模型发布页面,或返回先前步骤调整参数配置重新进行训练。
+
+![](images/visualization5.jpg)
+
+
+
+**第五步:模型发布**
+
+当模型效果满意后,您可根据实际的生产环境需求,选择将模型发布为需要的版本。
+
+![](images/publish.jpg)
+
+
+
+## FAQ
+
+1. **为什么训练速度这么慢?**
+
+   PaddleX完全采用您本地的硬件进行计算,深度学习任务确实对算力要求较高,为了使您能快速体验应用PaddleX进行开发,我们适配了CPU硬件,但强烈建议您使用GPU以提升训练速度和开发体验。
+
+   
+
+2. **我可以在服务器或云平台上部署PaddleX么?**
+
+   PaddleX GUI是一个适配本地单机安装的客户端,无法在服务器上直接进行部署,您可以直接使用PaddleX API,或采用飞桨核心框架进行服务器上的部署。如果您希望使用公有算力,强烈建议您尝试飞桨产品系列中的 [EasyDL](https://ai.baidu.com/easydl/) 或 [AI Studio](https://aistudio.baidu.com/aistudio/index)进行开发。
+
+   
+
+3. **PaddleX支持EasyData标注的数据吗?**
+
+   支持,PaddleX可顺畅读取EasyData标注的数据。但当前版本的PaddleX GUI暂时无法支持直接导入EasyData数据格式,您可以参照文档,将[数据集进行转换](https://paddlex.readthedocs.io/zh_CN/latest/appendix/how_to_convert_dataset.html)再导入PaddleX GUI进行后续开发。
+   同时,我们也在紧密开发PaddleX GUI可直接导入EasyData数据格式的功能。
+   
+   
+
+4. **为什么模型裁剪分析耗时这么长?**
+
+   模型裁剪分析过程是对模型各卷积层的敏感度信息进行分析,根据各参数对模型效果的影响进行不同比例的裁剪。此过程需要重复多次直至FLOPS满足要求,最后再进行精调训练获得最终裁剪后的模型,因此耗时较长。有关模型裁剪的原理,可参见文档[剪裁原理介绍](https://paddlepaddle.github.io/PaddleSlim/algo/algo.html#2-%E5%8D%B7%E7%A7%AF%E6%A0%B8%E5%89%AA%E8%A3%81%E5%8E%9F%E7%90%86)
+
+   
+
+5. **如何调用后端代码?**
+
+   PaddleX 团队为您整理了相关的API接口文档,方便您学习和使用。具体请参见[PaddleX API说明文档](https://paddlex.readthedocs.io/zh_CN/latest/apis/index.html)
+
+
+
+**如果您有更多问题或建议,欢迎以issue的形式,或加入PaddleX官方QQ群(1045148026)直接反馈您的问题和需求**
+
+![](images/QR.jpg)

BIN
docs/paddlex_gui/images/QR.jpg


+ 1 - 0
docs/paddlex_gui/images/ReadMe

@@ -0,0 +1 @@
+PaddleX GUI Screen Shot

BIN
docs/paddlex_gui/images/dataset2.jpg


BIN
docs/paddlex_gui/images/datasets1.jpg


BIN
docs/paddlex_gui/images/project1.jpg


BIN
docs/paddlex_gui/images/project2.jpg


BIN
docs/paddlex_gui/images/project3.jpg


BIN
docs/paddlex_gui/images/publish.jpg


BIN
docs/paddlex_gui/images/visualization1.jpg


BIN
docs/paddlex_gui/images/visualization2.jpg


BIN
docs/paddlex_gui/images/visualization3.jpg


BIN
docs/paddlex_gui/images/visualization4.jpg


BIN
docs/paddlex_gui/images/visualization5.jpg


+ 1 - 1
docs/paddlex_gui/index.rst

@@ -21,7 +21,7 @@ PaddleX GUI是基于PaddleX开发实现的可视化模型训练套件,可以
    how_to_use.md
    xx.md
 
-* PaddleX版本: v0.1.7
+* PaddleX GUI版本: v1.0
 * 项目官网: http://www.paddlepaddle.org.cn/paddle/paddlex  
 * 项目GitHub: https://github.com/PaddlePaddle/PaddleX/tree/develop  
 * 官方QQ用户群: 1045148026  

+ 8 - 0
docs/slim/index.rst

@@ -0,0 +1,8 @@
+模型压缩
+============================
+
+.. toctree::
+   :maxdepth: 2
+
+   prune.md
+   quant.md

+ 54 - 0
docs/slim/prune.md

@@ -0,0 +1,54 @@
+# 模型裁剪
+
+## 原理介绍
+
+模型裁剪用于减小模型的计算量和体积,可以加快模型部署后的预测速度,是一种减小模型大小和降低模型计算复杂度的常用方式,通过裁剪卷积层中Kernel输出通道的大小及其关联层参数大小来实现,其关联裁剪的原理可参见[PaddleSlim相关文档](https://paddlepaddle.github.io/PaddleSlim/algo/algo.html#id16)。**一般而言,在同等模型精度前提下,数据复杂度越低,模型可以被裁剪的比例就越高**。
+
+## 裁剪方法
+PaddleX提供了两种方式:
+
+**1.用户自行计算裁剪配置(推荐),整体流程包含三个步骤,**  
+> **第一步**: 使用数据集训练原始模型  
+> **第二步**:利用第一步训练好的模型,在验证数据集上计算模型中各个参数的敏感度,并将敏感度信息存储至本地文件  
+> **第三步**:使用数据集训练裁剪模型(与第一步差异在于需要在`train`接口中,将第二步计算得到的敏感信息文件传给接口的`sensitivities_file`参数)  
+
+> 在如上三个步骤中,**相当于模型共需要训练两遍**,分别对应第一步和第三步,但其中第三步训练的是裁剪后的模型,因此训练速度较第一步会更快。  
+> 第二步会遍历模型中的部分裁剪参数,分别计算各个参数裁剪后对于模型在验证集上效果的影响,**因此会反复在验证集上评估多次**。  
+
+**2.使用PaddleX内置的裁剪方案**  
+> PaddleX内置的模型裁剪方案是**基于标准数据集**上计算得到的参数敏感度信息,由于不同数据集特征分布会有较大差异,所以该方案相较于第1种方案训练得到的模型**精度一般而言会更低**(**且用户自定义数据集与标准数据集特征分布差异越大,导致训练的模型精度会越低**),仅在用户想节省时间的前提下可以参考使用,使用方式只需一步,  
+
+> **一步**: 使用数据集训练裁剪模型,在训练调用`train`接口时,将接口中的`sensitivities_file`参数设置为'DEFAULT'字符串
+
+> 注:各模型内置的裁剪方案分别依据的数据集为: 图像分类——ImageNet数据集、目标检测——PascalVOC数据集、语义分割——CityScape数据集
+
+## 裁剪实验
+基于上述两种方案,我们在PaddleX上使用样例数据进行了实验,在Tesla P40上实验指标如下所示,
+
+### 图像分类
+实验背景:使用MobileNetV2模型,数据集为蔬菜分类示例数据,见[使用教程-模型压缩-图像分类](../tutorials/compress/classification.md)
+
+| 模型 | 裁剪情况 | 模型大小 | Top1准确率(%) |GPU预测速度 | CPU预测速度 |
+| :-----| :--------| :-------- | :---------- |:---------- |:----------|
+|MobileNetV2 | 无裁剪(原模型)| 13.0M | 97.50|6.47ms |47.44ms |
+|MobileNetV2 | 方案一(eval_metric_loss=0.10) | 2.1M | 99.58 |5.03ms |20.22ms |
+|MobileNetV2 | 方案二(eval_metric_loss=0.10) | 6.0M | 99.58 |5.42ms |29.06ms |
+
+### 目标检测
+实验背景:使用YOLOv3-MobileNetV1模型,数据集为昆虫检测示例数据,见[使用教程-模型压缩-目标检测](../tutorials/compress/detection.md)
+
+
+| 模型 | 裁剪情况 | 模型大小 | MAP(%) |GPU预测速度 | CPU预测速度 |
+| :-----| :--------| :-------- | :---------- |:---------- | :---------|
+|YOLOv3-MobileNetV1 | 无裁剪(原模型)| 139M | 67.57| 14.88ms |976.42ms |
+|YOLOv3-MobileNetV1 | 方案一(eval_metric_loss=0.10) | 34M | 75.49 |10.60ms |558.49ms |
+|YOLOv3-MobileNetV1 | 方案二(eval_metric_loss=0.05) | 29M | 50.27| 9.43ms |360.46ms |
+
+### 语义分割
+实验背景:使用UNet模型,数据集为视盘分割示例数据, 见[使用教程-模型压缩-语义分割](../tutorials/compress/segmentation.md)
+
+| 模型 | 裁剪情况 | 模型大小 | mIOU(%) |GPU预测速度 | CPU预测速度 |
+| :-----| :--------| :-------- | :---------- |:---------- | :---------|
+|UNet | 无裁剪(原模型)| 77M | 91.22 |33.28ms |9523.55ms |
+|UNet | 方案一(eval_metric_loss=0.10) |26M | 90.37 |21.04ms |3936.20ms |
+|UNet | 方案二(eval_metric_loss=0.10) |23M | 91.21 |18.61ms |3447.75ms |

+ 11 - 0
docs/slim/quant.md

@@ -0,0 +1,11 @@
+# 模型量化
+
+## 原理介绍
+为了满足低内存带宽、低功耗、低计算资源占用以及低模型存储等需求,定点量化被提出。为此我们提供了训练后量化,该量化使用KL散度确定量化比例因子,将FP32模型转成INT8模型,且不需要重新训练,可以快速得到量化模型。
+
+
+## 使用PaddleX量化模型
+PaddleX提供了`export_quant_model`接口,让用户以接口的形式完成模型以post_quantization方式量化并导出。点击查看[量化接口使用文档](../apis/slim.md)。
+
+## 量化性能对比
+模型量化后的性能对比指标请查阅[PaddleSlim模型库](https://paddlepaddle.github.io/PaddleSlim/model_zoo.html)

+ 15 - 3
docs/tutorials/deploy/deploy_server/deploy_cpp/deploy_cpp_linux.md

@@ -96,6 +96,17 @@ cmake .. \
 make
 
 ```
+**注意:** linux环境下编译会自动下载OPENCV, PaddleX-Encryption和YAML,如果编译环境无法访问外网,可手动下载:
+
+- [opencv3gcc4.8.tar.bz2](https://paddleseg.bj.bcebos.com/deploy/docker/opencv3gcc4.8.tar.bz2)
+- [paddlex-encryption.zip](https://bj.bcebos.com/paddlex/tools/paddlex-encryption.zip)
+- [yaml-cpp.zip](https://bj.bcebos.com/paddlex/deploy/deps/yaml-cpp.zip)
+
+opencv3gcc4.8.tar.bz2文件下载后解压,然后在script/build.sh中指定`OPENCE_DIR`为解压后的路径。
+
+paddlex-encryption.zip文件下载后解压,然后在script/build.sh中指定`ENCRYPTION_DIR`为解压后的路径。
+
+yaml-cpp.zip文件下载后无需解压,在cmake/yaml.cmake中将`URL https://bj.bcebos.com/paddlex/deploy/deps/yaml-cpp.zip` 中的网址,改为下载文件的路径。
 
 修改脚本设置好主要参数后,执行`build`脚本:
  ```shell
@@ -104,8 +115,9 @@ make
 
 ### Step5: 预测及可视化
 
-参考[导出inference模型](../../deploy_python.html#inference)将模型导出为inference格式模型。
-**注意:由于PaddleX代码的持续更新,版本低于1.0.0的模型暂时无法直接用于预测部署,参考[模型版本升级](../../upgrade_version.md)对模型版本进行升级。**
+**在加载模型前,请检查你的模型目录中文件应该包括`model.yml`、`__model__`和`__params__`三个文件。如若不满足这个条件,请参考[模型导出为Inference文档](../deploy_python.html#inference)将模型导出为部署格式。**  
+
+> **注意:由于PaddleX代码的持续更新,版本低于1.0.0的模型(模型版本可查看model.yml文件中的version字段)暂时无法直接用于预测部署,参考[模型版本升级](../../upgrade_version.md)对模型版本进行升级。**  
 
 编译成功后,预测demo的可执行程序分别为`build/demo/detector`,`build/demo/classifer`,`build/demo/segmenter`,用户可根据自己的模型类型选择,其主要命令参数说明如下:
 
@@ -117,7 +129,7 @@ make
 | use_gpu  | 是否使用 GPU 预测, 支持值为0或1(默认值为0) |
 | use_trt  | 是否使用 TensorTr 预测, 支持值为0或1(默认值为0) |
 | gpu_id  | GPU 设备ID, 默认值为0 |
-| save_dir | 保存可视化结果的路径, 默认值为"output",classfier无该参数 |
+| save_dir | 保存可视化结果的路径, 默认值为"output",**classfier无该参数** |
 
 ## 样例
 

+ 12 - 3
docs/tutorials/deploy/deploy_server/deploy_cpp/deploy_cpp_win_vs2019.md

@@ -86,7 +86,14 @@ PaddlePaddle C++ 预测库针对不同的`CPU`,`CUDA`,以及是否支持Tens
 | OPENCV_DIR  | OpenCV的安装路径, |
 | PADDLE_DIR | Paddle c++预测库的路径 |
 
-**注意:** 1. 使用`CPU`版预测库,请把`WITH_GPU`的`值`去掉勾 2. 如果使用的是`openblas`版本,请把`WITH_MKL`的`值`去掉勾
+**注意:**
+1. 使用`CPU`版预测库,请把`WITH_GPU`的`值`去掉勾
+
+2. 如果使用的是`openblas`版本,请把`WITH_MKL`的`值`去掉勾
+
+3. Windows环境下编译会自动下载YAML,如果编译环境无法访问外网,可手动下载: [yaml-cpp.zip](https://bj.bcebos.com/paddlex/deploy/deps/yaml-cpp.zip)
+
+yaml-cpp.zip文件下载后无需解压,在cmake/yaml.cmake中将`URL https://bj.bcebos.com/paddlex/deploy/deps/yaml-cpp.zip` 中的网址,改为下载文件的路径。
 
 ![step4](../../images/vs2019_step5.png)
 
@@ -99,8 +106,10 @@ PaddlePaddle C++ 预测库针对不同的`CPU`,`CUDA`,以及是否支持Tens
 
 ### Step5: 预测及可视化
 
-参考[导出inference模型](../deploy_python.html#inference)将模型导出为inference格式模型。
-**注意:由于PaddleX代码的持续更新,版本低于1.0.0的模型暂时无法直接用于预测部署,参考[模型版本升级](../../upgrade_version.md)对模型版本进行升级。**
+
+**在加载模型前,请检查你的模型目录中文件应该包括`model.yml`、`__model__`和`__params__`三个文件。如若不满足这个条件,请参考[模型导出为Inference文档](../deploy_python.html#inference)将模型导出为部署格式。**   
+
+**注意:由于PaddleX代码的持续更新,版本低于1.0.0的模型(模型版本可查看model.yml文件中的version字段)暂时无法直接用于预测部署,参考[模型版本升级](../../upgrade_version.md)对模型版本进行升级。**
 
 上述`Visual Studio 2019`编译产出的可执行文件在`out\build\x64-Release`目录下,打开`cmd`,并切换到该目录:
 

+ 18 - 0
new_tutorials/train/README.md

@@ -0,0 +1,18 @@
+# 使用教程——训练模型
+
+本目录下整理了使用PaddleX训练模型的示例代码,代码中均提供了示例数据的自动下载,并均使用单张GPU卡进行训练。
+
+|代码 | 模型任务 | 数据 |
+|------|--------|---------|
+|classification/mobilenetv2.py | 图像分类MobileNetV2 | 蔬菜分类 |
+|classification/resnet50.py | 图像分类ResNet50 | 蔬菜分类 |
+|detection/faster_rcnn_r50_fpn.py | 目标检测FasterRCNN | 昆虫检测 |
+|detection/mask_rcnn_f50_fpn.py | 实例分割MaskRCNN | 垃圾分拣 |
+|segmentation/deeplabv3p.py | 语义分割DeepLabV3| 视盘分割 |
+|segmentation/unet.py | 语义分割UNet | 视盘分割 |
+
+## 开始训练
+在安装PaddleX后,使用如下命令开始训练
+```
+python classification/mobilenetv2.py
+```

+ 47 - 0
new_tutorials/train/classification/mobilenetv2.py

@@ -0,0 +1,47 @@
+import os
+# 选择使用0号卡
+os.environ['CUDA_VISIBLE_DEVICES'] = '0'
+
+from paddlex.cls import transforms
+import paddlex as pdx
+
+# 下载和解压蔬菜分类数据集
+veg_dataset = 'https://bj.bcebos.com/paddlex/datasets/vegetables_cls.tar.gz'
+pdx.utils.download_and_decompress(veg_dataset, path='./')
+
+# 定义训练和验证时的transforms
+# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/cls_transforms.html#composedclstransforms
+train_transforms = transforms.ComposedClsTransforms(mode='train', crop_size=[224, 224])
+eval_transforms = transforms.ComposedClsTransforms(mode='eval', crop_size=[224, 224])
+
+# 定义训练和验证所用的数据集
+# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/datasets/classification.html#imagenet
+train_dataset = pdx.datasets.ImageNet(
+    data_dir='vegetables_cls',
+    file_list='vegetables_cls/train_list.txt',
+    label_list='vegetables_cls/labels.txt',
+    transforms=train_transforms,
+    shuffle=True)
+eval_dataset = pdx.datasets.ImageNet(
+    data_dir='vegetables_cls',
+    file_list='vegetables_cls/val_list.txt',
+    label_list='vegetables_cls/labels.txt',
+    transforms=eval_transforms)
+
+# 初始化模型,并进行训练
+# 可使用VisualDL查看训练指标
+# VisualDL启动方式: visualdl --logdir output/mobilenetv2/vdl_log --port 8001
+# 浏览器打开 https://0.0.0.0:8001即可
+# 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP
+
+# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/models/classification.html#resnet50
+model = pdx.cls.MobileNetV2(num_classes=len(train_dataset.labels))
+model.train(
+    num_epochs=10,
+    train_dataset=train_dataset,
+    train_batch_size=32,
+    eval_dataset=eval_dataset,
+    lr_decay_epochs=[4, 6, 8],
+    learning_rate=0.025,
+    save_dir='output/mobilenetv2',
+    use_vdl=True)

+ 56 - 0
new_tutorials/train/classification/resnet50.py

@@ -0,0 +1,56 @@
+import os
+# 选择使用0号卡
+os.environ['CUDA_VISIBLE_DEVICES'] = '0'
+
+import paddle.fluid as fluid
+from paddlex.cls import transforms
+import paddlex as pdx
+
+# 下载和解压蔬菜分类数据集
+veg_dataset = 'https://bj.bcebos.com/paddlex/datasets/vegetables_cls.tar.gz'
+pdx.utils.download_and_decompress(veg_dataset, path='./')
+
+# 定义训练和验证时的transforms
+# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/cls_transforms.html#composedclstransforms
+train_transforms = transforms.ComposedClsTransforms(mode='train', crop_size=[224, 224])
+eval_transforms = transforms.ComposedClsTransforms(mode='eval', crop_size=[224, 224])
+
+# 定义训练和验证所用的数据集
+# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/datasets/classification.html#imagenet
+train_dataset = pdx.datasets.ImageNet(
+    data_dir='vegetables_cls',
+    file_list='vegetables_cls/train_list.txt',
+    label_list='vegetables_cls/labels.txt',
+    transforms=train_transforms,
+    shuffle=True)
+eval_dataset = pdx.datasets.ImageNet(
+    data_dir='vegetables_cls',
+    file_list='vegetables_cls/val_list.txt',
+    label_list='vegetables_cls/labels.txt',
+    transforms=eval_transforms)
+
+# PaddleX支持自定义构建优化器
+step_each_epoch = train_dataset.num_samples // 32
+learning_rate = fluid.layers.cosine_decay(
+    learning_rate=0.025, step_each_epoch=step_each_epoch, epochs=10)
+optimizer = fluid.optimizer.Momentum(
+    learning_rate=learning_rate,
+    momentum=0.9,
+    regularization=fluid.regularizer.L2Decay(4e-5))
+
+# 初始化模型,并进行训练
+# 可使用VisualDL查看训练指标
+# VisualDL启动方式: visualdl --logdir output/resnet50/vdl_log --port 8001
+# 浏览器打开 https://0.0.0.0:8001即可
+# 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP
+
+# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/models/classification.html#resnet50
+model = pdx.cls.ResNet50(num_classes=len(train_dataset.labels))
+model.train(
+    num_epochs=10,
+    train_dataset=train_dataset,
+    train_batch_size=32,
+    eval_dataset=eval_dataset,
+    optimizer=optimizer,
+    save_dir='output/resnet50',
+    use_vdl=True)

+ 49 - 0
new_tutorials/train/detection/faster_rcnn_r50_fpn.py

@@ -0,0 +1,49 @@
+import os
+# 选择使用0号卡
+os.environ['CUDA_VISIBLE_DEVICES'] = '0'
+
+from paddlex.det import transforms
+import paddlex as pdx
+
+# 下载和解压昆虫检测数据集
+insect_dataset = 'https://bj.bcebos.com/paddlex/datasets/insect_det.tar.gz'
+pdx.utils.download_and_decompress(insect_dataset, path='./')
+
+# 定义训练和验证时的transforms
+# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/det_transforms.html#composedrcnntransforms
+train_transforms = transforms.ComposedRCNNTransforms(mode='train', min_max_size=[800, 1333])
+eval_transforms = transforms.ComposedRCNNTransforms(mode='eval', min_max_size=[800, 1333])
+
+# 定义训练和验证所用的数据集
+# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/datasets/detection.html#vocdetection
+train_dataset = pdx.datasets.VOCDetection(
+    data_dir='insect_det',
+    file_list='insect_det/train_list.txt',
+    label_list='insect_det/labels.txt',
+    transforms=train_transforms,
+    shuffle=True)
+eval_dataset = pdx.datasets.VOCDetection(
+    data_dir='insect_det',
+    file_list='insect_det/val_list.txt',
+    label_list='insect_det/labels.txt',
+    transforms=eval_transforms)
+
+# 初始化模型,并进行训练
+# 可使用VisualDL查看训练指标
+# VisualDL启动方式: visualdl --logdir output/faster_rcnn_r50_fpn/vdl_log --port 8001
+# 浏览器打开 https://0.0.0.0:8001即可
+# 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP
+# num_classes 需要设置为包含背景类的类别数,即: 目标类别数量 + 1
+
+# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/models/detection.html#fasterrcnn
+num_classes = len(train_dataset.labels) + 1
+model = pdx.det.FasterRCNN(num_classes=num_classes)
+model.train(
+    num_epochs=12,
+    train_dataset=train_dataset,
+    train_batch_size=2,
+    eval_dataset=eval_dataset,
+    learning_rate=0.0025,
+    lr_decay_epochs=[8, 11],
+    save_dir='output/faster_rcnn_r50_fpn',
+    use_vdl=True)

+ 48 - 0
new_tutorials/train/detection/mask_rcnn_r50_fpn.py

@@ -0,0 +1,48 @@
+import os
+# 选择使用0号卡
+os.environ['CUDA_VISIBLE_DEVICES'] = '0'
+
+from paddlex.det import transforms
+import paddlex as pdx
+
+# 下载和解压小度熊分拣数据集
+xiaoduxiong_dataset = 'https://bj.bcebos.com/paddlex/datasets/xiaoduxiong_ins_det.tar.gz'
+pdx.utils.download_and_decompress(xiaoduxiong_dataset, path='./')
+
+# 定义训练和验证时的transforms
+# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/det_transforms.html#composedrcnntransforms
+train_transforms = transforms.ComposedRCNNTransforms(mode='train', min_max_size=[800, 1333])
+eval_transforms = transforms.ComposedRCNNTransforms(mode='eval', min_max_size=[800, 1333])
+
+# 定义训练和验证所用的数据集
+# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/datasets/detection.html#cocodetection
+train_dataset = pdx.datasets.CocoDetection(
+    data_dir='xiaoduxiong_ins_det/JPEGImages',
+    ann_file='xiaoduxiong_ins_det/train.json',
+    transforms=train_transforms,
+    shuffle=True)
+eval_dataset = pdx.datasets.CocoDetection(
+    data_dir='xiaoduxiong_ins_det/JPEGImages',
+    ann_file='xiaoduxiong_ins_det/val.json',
+    transforms=eval_transforms)
+
+# 初始化模型,并进行训练
+# 可使用VisualDL查看训练指标
+# VisualDL启动方式: visualdl --logdir output/mask_rcnn_r50_fpn/vdl_log --port 8001
+# 浏览器打开 https://0.0.0.0:8001即可
+# 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP
+# num_classes 需要设置为包含背景类的类别数,即: 目标类别数量 + 1
+
+# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/models/instance_segmentation.html#maskrcnn
+num_classes = len(train_dataset.labels) + 1
+model = pdx.det.MaskRCNN(num_classes=num_classes)
+model.train(
+    num_epochs=12,
+    train_dataset=train_dataset,
+    train_batch_size=1,
+    eval_dataset=eval_dataset,
+    learning_rate=0.00125,
+    warmup_steps=10,
+    lr_decay_epochs=[8, 11],
+    save_dir='output/mask_rcnn_r50_fpn',
+    use_vdl=True)

+ 48 - 0
new_tutorials/train/detection/yolov3_darknet53.py

@@ -0,0 +1,48 @@
+import os
+# 选择使用0号卡
+os.environ['CUDA_VISIBLE_DEVICES'] = '0'
+
+from paddlex.det import transforms
+import paddlex as pdx
+
+# 下载和解压昆虫检测数据集
+insect_dataset = 'https://bj.bcebos.com/paddlex/datasets/insect_det.tar.gz'
+pdx.utils.download_and_decompress(insect_dataset, path='./')
+
+# 定义训练和验证时的transforms
+# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/det_transforms.html#composedyolotransforms
+train_transforms = transforms.ComposedYOLOv3Transforms(mode='train', shape=[608, 608])
+eval_transforms = transforms.ComposedYOLOv3Transforms(mode='eva', shape=[608, 608])
+
+# 定义训练和验证所用的数据集
+# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/datasets/detection.html#vocdetection
+train_dataset = pdx.datasets.VOCDetection(
+    data_dir='insect_det',
+    file_list='insect_det/train_list.txt',
+    label_list='insect_det/labels.txt',
+    transforms=train_transforms,
+    shuffle=True)
+eval_dataset = pdx.datasets.VOCDetection(
+    data_dir='insect_det',
+    file_list='insect_det/val_list.txt',
+    label_list='insect_det/labels.txt',
+    transforms=eval_transforms)
+
+# 初始化模型,并进行训练
+# 可使用VisualDL查看训练指标
+# VisualDL启动方式: visualdl --logdir output/yolov3_darknet/vdl_log --port 8001
+# 浏览器打开 https://0.0.0.0:8001即可
+# 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP
+
+# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/models/detection.html#yolov3
+num_classes = len(train_dataset.labels)
+model = pdx.det.YOLOv3(num_classes=num_classes, backbone='DarkNet53')
+model.train(
+    num_epochs=270,
+    train_dataset=train_dataset,
+    train_batch_size=8,
+    eval_dataset=eval_dataset,
+    learning_rate=0.000125,
+    lr_decay_epochs=[210, 240],
+    save_dir='output/yolov3_darknet53',
+    use_vdl=True)

+ 51 - 0
new_tutorials/train/segmentation/deeplabv3p.py

@@ -0,0 +1,51 @@
+import os
+# 选择使用0号卡
+os.environ['CUDA_VISIBLE_DEVICES'] = '0'
+
+import paddlex as pdx
+from paddlex.seg import transforms
+
+# 下载和解压视盘分割数据集
+optic_dataset = 'https://bj.bcebos.com/paddlex/datasets/optic_disc_seg.tar.gz'
+pdx.utils.download_and_decompress(optic_dataset, path='./')
+
+# 定义训练和验证时的transforms
+# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/seg_transforms.html#composedsegtransforms
+train_transforms = transforms.ComposedSegTransforms(mode='train', train_crop_size=[769, 769])
+eval_transforms = transforms.ComposedSegTransforms(mode='eval')
+
+train_transforms.add_augmenters([
+    transforms.RandomRotate()
+])
+
+# 定义训练和验证所用的数据集
+# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/datasets/semantic_segmentation.html#segdataset
+train_dataset = pdx.datasets.SegDataset(
+    data_dir='optic_disc_seg',
+    file_list='optic_disc_seg/train_list.txt',
+    label_list='optic_disc_seg/labels.txt',
+    transforms=train_transforms,
+    shuffle=True)
+eval_dataset = pdx.datasets.SegDataset(
+    data_dir='optic_disc_seg',
+    file_list='optic_disc_seg/val_list.txt',
+    label_list='optic_disc_seg/labels.txt',
+    transforms=eval_transforms)
+
+# 初始化模型,并进行训练
+# 可使用VisualDL查看训练指标
+# VisualDL启动方式: visualdl --logdir output/deeplab/vdl_log --port 8001
+# 浏览器打开 https://0.0.0.0:8001即可
+# 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP
+
+# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/models/semantic_segmentation.html#deeplabv3p
+num_classes = len(train_dataset.labels)
+model = pdx.seg.DeepLabv3p(num_classes=num_classes)
+model.train(
+    num_epochs=40,
+    train_dataset=train_dataset,
+    train_batch_size=4,
+    eval_dataset=eval_dataset,
+    learning_rate=0.01,
+    save_dir='output/deeplab',
+    use_vdl=True)

+ 47 - 0
new_tutorials/train/segmentation/hrnet.py

@@ -0,0 +1,47 @@
+import os
+# 选择使用0号卡
+os.environ['CUDA_VISIBLE_DEVICES'] = '0'
+
+import paddlex as pdx
+from paddlex.seg import transforms
+
+# 下载和解压视盘分割数据集
+optic_dataset = 'https://bj.bcebos.com/paddlex/datasets/optic_disc_seg.tar.gz'
+pdx.utils.download_and_decompress(optic_dataset, path='./')
+
+# 定义训练和验证时的transforms
+# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/seg_transforms.html#composedsegtransforms
+train_transforms = transforms.ComposedSegTransforms(mode='train', train_crop_size=[769, 769])
+eval_transforms = transforms.ComposedSegTransforms(mode='eval')
+
+# 定义训练和验证所用的数据集
+# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/datasets/semantic_segmentation.html#segdataset
+train_dataset = pdx.datasets.SegDataset(
+    data_dir='optic_disc_seg',
+    file_list='optic_disc_seg/train_list.txt',
+    label_list='optic_disc_seg/labels.txt',
+    transforms=train_transforms,
+    shuffle=True)
+eval_dataset = pdx.datasets.SegDataset(
+    data_dir='optic_disc_seg',
+    file_list='optic_disc_seg/val_list.txt',
+    label_list='optic_disc_seg/labels.txt',
+    transforms=eval_transforms)
+
+# 初始化模型,并进行训练
+# 可使用VisualDL查看训练指标
+# VisualDL启动方式: visualdl --logdir output/unet/vdl_log --port 8001
+# 浏览器打开 https://0.0.0.0:8001即可
+# 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP
+
+# https://paddlex.readthedocs.io/zh_CN/latest/apis/models/semantic_segmentation.html#hrnet
+num_classes = len(train_dataset.labels)
+model = pdx.seg.HRNet(num_classes=num_classes)
+model.train(
+    num_epochs=20,
+    train_dataset=train_dataset,
+    train_batch_size=4,
+    eval_dataset=eval_dataset,
+    learning_rate=0.01,
+    save_dir='output/hrnet',
+    use_vdl=True)

+ 47 - 0
new_tutorials/train/segmentation/unet.py

@@ -0,0 +1,47 @@
+import os
+# 选择使用0号卡
+os.environ['CUDA_VISIBLE_DEVICES'] = '0'
+
+import paddlex as pdx
+from paddlex.seg import transforms
+
+# 下载和解压视盘分割数据集
+optic_dataset = 'https://bj.bcebos.com/paddlex/datasets/optic_disc_seg.tar.gz'
+pdx.utils.download_and_decompress(optic_dataset, path='./')
+
+# 定义训练和验证时的transforms
+# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/seg_transforms.html#composedsegtransforms
+train_transforms = transforms.ComposedSegTransforms(mode='train', train_crop_size=[769, 769])
+eval_transforms = transforms.ComposedSegTransforms(mode='eval')
+
+# 定义训练和验证所用的数据集
+# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/datasets/semantic_segmentation.html#segdataset
+train_dataset = pdx.datasets.SegDataset(
+    data_dir='optic_disc_seg',
+    file_list='optic_disc_seg/train_list.txt',
+    label_list='optic_disc_seg/labels.txt',
+    transforms=train_transforms,
+    shuffle=True)
+eval_dataset = pdx.datasets.SegDataset(
+    data_dir='optic_disc_seg',
+    file_list='optic_disc_seg/val_list.txt',
+    label_list='optic_disc_seg/labels.txt',
+    transforms=eval_transforms)
+
+# 初始化模型,并进行训练
+# 可使用VisualDL查看训练指标
+# VisualDL启动方式: visualdl --logdir output/unet/vdl_log --port 8001
+# 浏览器打开 https://0.0.0.0:8001即可
+# 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP
+
+# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/models/semantic_segmentation.html#unet
+num_classes = len(train_dataset.labels)
+model = pdx.seg.UNet(num_classes=num_classes)
+model.train(
+    num_epochs=20,
+    train_dataset=train_dataset,
+    train_batch_size=4,
+    eval_dataset=eval_dataset,
+    learning_rate=0.01,
+    save_dir='output/unet',
+    use_vdl=True)

+ 1 - 1
paddlex/__init__.py

@@ -53,4 +53,4 @@ log_level = 2
 
 from . import interpret
 
-__version__ = '1.0.4'
+__version__ = '1.0.6'

+ 1 - 0
paddlex/cls.py

@@ -37,5 +37,6 @@ DenseNet161 = cv.models.DenseNet161
 DenseNet201 = cv.models.DenseNet201
 ShuffleNetV2 = cv.models.ShuffleNetV2
 HRNet_W18 = cv.models.HRNet_W18
+AlexNet = cv.models.AlexNet
 
 transforms = cv.transforms.cls_transforms

+ 1 - 0
paddlex/cv/models/__init__.py

@@ -35,6 +35,7 @@ from .classifier import DenseNet161
 from .classifier import DenseNet201
 from .classifier import ShuffleNetV2
 from .classifier import HRNet_W18
+from .classifier import AlexNet
 from .base import BaseAPI
 from .yolo_v3 import YOLOv3
 from .faster_rcnn import FasterRCNN

+ 9 - 13
paddlex/cv/models/base.py

@@ -221,8 +221,8 @@ class BaseAPI:
             logging.info(
                 "Load pretrain weights from {}.".format(pretrain_weights),
                 use_color=True)
-            paddlex.utils.utils.load_pretrain_weights(
-                self.exe, self.train_prog, pretrain_weights, fuse_bn)
+            paddlex.utils.utils.load_pretrain_weights(self.exe, self.train_prog,
+                                                      pretrain_weights, fuse_bn)
         # 进行裁剪
         if sensitivities_file is not None:
             import paddleslim
@@ -262,6 +262,7 @@ class BaseAPI:
 
         info['_Attributes']['num_classes'] = self.num_classes
         info['_Attributes']['labels'] = self.labels
+        info['_Attributes']['fixed_input_shape'] = self.fixed_input_shape
         try:
             primary_metric_key = list(self.eval_metrics.keys())[0]
             primary_metric_value = float(self.eval_metrics[primary_metric_key])
@@ -325,9 +326,7 @@ class BaseAPI:
         logging.info("Model saved in {}.".format(save_dir))
 
     def export_inference_model(self, save_dir):
-        test_input_names = [
-            var.name for var in list(self.test_inputs.values())
-        ]
+        test_input_names = [var.name for var in list(self.test_inputs.values())]
         test_outputs = list(self.test_outputs.values())
         if self.__class__.__name__ == 'MaskRCNN':
             from paddlex.utils.save import save_mask_inference_model
@@ -364,8 +363,7 @@ class BaseAPI:
 
         # 模型保存成功的标志
         open(osp.join(save_dir, '.success'), 'w').close()
-        logging.info("Model for inference deploy saved in {}.".format(
-            save_dir))
+        logging.info("Model for inference deploy saved in {}.".format(save_dir))
 
     def train_loop(self,
                    num_epochs,
@@ -489,13 +487,11 @@ class BaseAPI:
                         eta = ((num_epochs - i) * total_num_steps - step - 1
                                ) * avg_step_time
                     if time_eval_one_epoch is not None:
-                        eval_eta = (
-                            total_eval_times - i // save_interval_epochs
-                        ) * time_eval_one_epoch
+                        eval_eta = (total_eval_times - i // save_interval_epochs
+                                    ) * time_eval_one_epoch
                     else:
-                        eval_eta = (
-                            total_eval_times - i // save_interval_epochs
-                        ) * total_num_steps_eval * avg_step_time
+                        eval_eta = (total_eval_times - i // save_interval_epochs
+                                    ) * total_num_steps_eval * avg_step_time
                     eta_str = seconds_to_hms(eta + eval_eta)
 
                     logging.info(

+ 41 - 9
paddlex/cv/models/classifier.py

@@ -48,12 +48,13 @@ class BaseClassifier(BaseAPI):
         self.fixed_input_shape = None
 
     def build_net(self, mode='train'):
+        if self.__class__.__name__ == "AlexNet":
+            assert self.fixed_input_shape is not None, "In AlexNet, input_shape should be defined, e.g. model = paddlex.cls.AlexNet(num_classes=1000, input_shape=[224, 224])"
         if self.fixed_input_shape is not None:
             input_shape = [
                 None, 3, self.fixed_input_shape[1], self.fixed_input_shape[0]
             ]
-            image = fluid.data(
-                dtype='float32', shape=input_shape, name='image')
+            image = fluid.data(dtype='float32', shape=input_shape, name='image')
         else:
             image = fluid.data(
                 dtype='float32', shape=[None, 3, None, None], name='image')
@@ -81,7 +82,8 @@ class BaseClassifier(BaseAPI):
             del outputs['loss']
         return inputs, outputs
 
-    def default_optimizer(self, learning_rate, lr_decay_epochs, lr_decay_gamma,
+    def default_optimizer(self, learning_rate, warmup_steps, warmup_start_lr,
+                          lr_decay_epochs, lr_decay_gamma,
                           num_steps_each_epoch):
         boundaries = [b * num_steps_each_epoch for b in lr_decay_epochs]
         values = [
@@ -90,6 +92,24 @@ class BaseClassifier(BaseAPI):
         ]
         lr_decay = fluid.layers.piecewise_decay(
             boundaries=boundaries, values=values)
+        if warmup_steps > 0:
+            if warmup_steps > lr_decay_epochs[0] * num_steps_each_epoch:
+                logging.error(
+                    "In function train(), parameters should satisfy: warmup_steps <= lr_decay_epochs[0]*num_samples_in_train_dataset",
+                    exit=False)
+                logging.error(
+                    "See this doc for more information: https://github.com/PaddlePaddle/PaddleX/blob/develop/docs/appendix/parameters.md#notice",
+                    exit=False)
+                logging.error(
+                    "warmup_steps should less than {} or lr_decay_epochs[0] greater than {}, please modify 'lr_decay_epochs' or 'warmup_steps' in train function".
+                    format(lr_decay_epochs[0] * num_steps_each_epoch,
+                           warmup_steps // num_steps_each_epoch))
+
+            lr_decay = fluid.layers.linear_lr_warmup(
+                learning_rate=lr_decay,
+                warmup_steps=warmup_steps,
+                start_lr=warmup_start_lr,
+                end_lr=learning_rate)
         optimizer = fluid.optimizer.Momentum(
             lr_decay,
             momentum=0.9,
@@ -107,6 +127,8 @@ class BaseClassifier(BaseAPI):
               pretrain_weights='IMAGENET',
               optimizer=None,
               learning_rate=0.025,
+              warmup_steps=0,
+              warmup_start_lr=0.0,
               lr_decay_epochs=[30, 60, 90],
               lr_decay_gamma=0.1,
               use_vdl=False,
@@ -129,6 +151,8 @@ class BaseClassifier(BaseAPI):
             optimizer (paddle.fluid.optimizer): 优化器。当该参数为None时,使用默认优化器:
                 fluid.layers.piecewise_decay衰减策略,fluid.optimizer.Momentum优化方法。
             learning_rate (float): 默认优化器的初始学习率。默认为0.025。
+            warmup_steps(int): 学习率从warmup_start_lr上升至设定的learning_rate,所需的步数,默认为0
+            warmup_start_lr(float): 学习率在warmup阶段时的起始值,默认为0.0
             lr_decay_epochs (list): 默认优化器的学习率衰减轮数。默认为[30, 60, 90]。
             lr_decay_gamma (float): 默认优化器的学习率衰减率。默认为0.1。
             use_vdl (bool): 是否使用VisualDL进行可视化。默认值为False。
@@ -149,6 +173,8 @@ class BaseClassifier(BaseAPI):
             num_steps_each_epoch = train_dataset.num_samples // train_batch_size
             optimizer = self.default_optimizer(
                 learning_rate=learning_rate,
+                warmup_steps=warmup_steps,
+                warmup_start_lr=warmup_start_lr,
                 lr_decay_epochs=lr_decay_epochs,
                 lr_decay_gamma=lr_decay_gamma,
                 num_steps_each_epoch=num_steps_each_epoch)
@@ -193,8 +219,7 @@ class BaseClassifier(BaseAPI):
           tuple (metrics, eval_details): 当return_details为True时,增加返回dict,
               包含关键字:'true_labels'、'pred_scores',分别代表真实类别id、每个类别的预测得分。
         """
-        self.arrange_transforms(
-            transforms=eval_dataset.transforms, mode='eval')
+        self.arrange_transforms(transforms=eval_dataset.transforms, mode='eval')
         data_generator = eval_dataset.generator(
             batch_size=batch_size, drop_last=False)
         k = min(5, self.num_classes)
@@ -206,9 +231,8 @@ class BaseClassifier(BaseAPI):
                 self.test_prog).with_data_parallel(
                     share_vars_from=self.parallel_train_prog)
         batch_size_each_gpu = self._get_single_card_bs(batch_size)
-        logging.info(
-            "Start to evaluating(total_samples={}, total_steps={})...".format(
-                eval_dataset.num_samples, total_steps))
+        logging.info("Start to evaluating(total_samples={}, total_steps={})...".
+                     format(eval_dataset.num_samples, total_steps))
         for step, data in tqdm.tqdm(
                 enumerate(data_generator()), total=total_steps):
             images = np.array([d[0] for d in data]).astype('float32')
@@ -264,7 +288,8 @@ class BaseClassifier(BaseAPI):
             im = self.test_transforms(img_file)
         result = self.exe.run(self.test_prog,
                               feed={'image': im},
-                              fetch_list=list(self.test_outputs.values()))
+                              fetch_list=list(self.test_outputs.values()),
+                              use_program_cache=True)
         pred_label = np.argsort(result[0][0])[::-1][:true_topk]
         res = [{
             'category_id': l,
@@ -404,3 +429,10 @@ class HRNet_W18(BaseClassifier):
     def __init__(self, num_classes=1000):
         super(HRNet_W18, self).__init__(
             model_name='HRNet_W18', num_classes=num_classes)
+
+
+class AlexNet(BaseClassifier):
+    def __init__(self, num_classes=1000, input_shape=None):
+        super(AlexNet, self).__init__(
+            model_name='AlexNet', num_classes=num_classes)
+        self.fixed_input_shape = input_shape

+ 4 - 2
paddlex/cv/models/deeplabv3p.py

@@ -337,7 +337,8 @@ class DeepLabv3p(BaseAPI):
             for d in data:
                 padding_label = np.zeros(
                     (1, im_h, im_w)).astype('int64') + self.ignore_index
-                padding_label[:, :im_h, :im_w] = d[1]
+                _, label_h, label_w = d[1].shape
+                padding_label[:, :label_h, :label_w] = d[1]
                 labels.append(padding_label)
             labels = np.array(labels)
 
@@ -398,7 +399,8 @@ class DeepLabv3p(BaseAPI):
         im = np.expand_dims(im, axis=0)
         result = self.exe.run(self.test_prog,
                               feed={'image': im},
-                              fetch_list=list(self.test_outputs.values()))
+                              fetch_list=list(self.test_outputs.values()),
+                              use_program_cache=True)
         pred = result[0]
         pred = np.squeeze(pred).astype('uint8')
         logit = result[1]

+ 16 - 10
paddlex/cv/models/faster_rcnn.py

@@ -138,8 +138,16 @@ class FasterRCNN(BaseAPI):
                           lr_decay_epochs, lr_decay_gamma,
                           num_steps_each_epoch):
         if warmup_steps > lr_decay_epochs[0] * num_steps_each_epoch:
-            raise Exception("warmup_steps should less than {}".format(
-                lr_decay_epochs[0] * num_steps_each_epoch))
+            logging.error(
+                "In function train(), parameters should satisfy: warmup_steps <= lr_decay_epochs[0]*num_samples_in_train_dataset",
+                exit=False)
+            logging.error(
+                "See this doc for more information: https://github.com/PaddlePaddle/PaddleX/blob/develop/docs/appendix/parameters.md#notice",
+                exit=False)
+            logging.error(
+                "warmup_steps should less than {} or lr_decay_epochs[0] greater than {}, please modify 'lr_decay_epochs' or 'warmup_steps' in train function".
+                format(lr_decay_epochs[0] * num_steps_each_epoch, warmup_steps
+                       // num_steps_each_epoch))
         boundaries = [b * num_steps_each_epoch for b in lr_decay_epochs]
         values = [(lr_decay_gamma**i) * learning_rate
                   for i in range(len(lr_decay_epochs) + 1)]
@@ -282,8 +290,7 @@ class FasterRCNN(BaseAPI):
                 eval_details为dict,包含关键字:'bbox',对应元素预测结果列表,每个预测结果由图像id、
                 预测框类别id、预测框坐标、预测框得分;’gt‘:真实标注框相关信息。
         """
-        self.arrange_transforms(
-            transforms=eval_dataset.transforms, mode='eval')
+        self.arrange_transforms(transforms=eval_dataset.transforms, mode='eval')
         if metric is None:
             if hasattr(self, 'metric') and self.metric is not None:
                 metric = self.metric
@@ -302,14 +309,12 @@ class FasterRCNN(BaseAPI):
             logging.warning(
                 "Faster RCNN supports batch_size=1 only during evaluating, so batch_size is forced to be set to 1."
             )
-        dataset = eval_dataset.generator(
-            batch_size=batch_size, drop_last=False)
+        dataset = eval_dataset.generator(batch_size=batch_size, drop_last=False)
 
         total_steps = math.ceil(eval_dataset.num_samples * 1.0 / batch_size)
         results = list()
-        logging.info(
-            "Start to evaluating(total_samples={}, total_steps={})...".format(
-                eval_dataset.num_samples, total_steps))
+        logging.info("Start to evaluating(total_samples={}, total_steps={})...".
+                     format(eval_dataset.num_samples, total_steps))
         for step, data in tqdm.tqdm(enumerate(dataset()), total=total_steps):
             images = np.array([d[0] for d in data]).astype('float32')
             im_infos = np.array([d[1] for d in data]).astype('float32')
@@ -389,7 +394,8 @@ class FasterRCNN(BaseAPI):
                                    'im_shape': im_shape
                                },
                                fetch_list=list(self.test_outputs.values()),
-                               return_numpy=False)
+                               return_numpy=False,
+                               use_program_cache=True)
         res = {
             k: (np.array(v), v.recursive_sequence_lengths())
             for k, v in zip(list(self.test_outputs.keys()), outputs)

+ 11 - 2
paddlex/cv/models/load_model.py

@@ -41,7 +41,16 @@ def load_model(model_dir, fixed_input_shape=None):
     if 'model_name' in info['_init_params']:
         del info['_init_params']['model_name']
     model = getattr(paddlex.cv.models, info['Model'])(**info['_init_params'])
+
     model.fixed_input_shape = fixed_input_shape
+    if '_Attributes' in info:
+        if 'fixed_input_shape' in info['_Attributes']:
+            fixed_input_shape = info['_Attributes']['fixed_input_shape']
+            if fixed_input_shape is not None:
+                logging.info("Model already has fixed_input_shape with {}".
+                             format(fixed_input_shape))
+                model.fixed_input_shape = fixed_input_shape
+
     if status == "Normal" or \
             status == "Prune" or status == "fluid.save":
         startup_prog = fluid.Program()
@@ -88,8 +97,8 @@ def load_model(model_dir, fixed_input_shape=None):
                 model.model_type, info['Transforms'], info['BatchTransforms'])
             model.eval_transforms = copy.deepcopy(model.test_transforms)
         else:
-            model.test_transforms = build_transforms(
-                model.model_type, info['Transforms'], to_rgb)
+            model.test_transforms = build_transforms(model.model_type,
+                                                     info['Transforms'], to_rgb)
             model.eval_transforms = copy.deepcopy(model.test_transforms)
 
     if '_Attributes' in info:

+ 16 - 10
paddlex/cv/models/mask_rcnn.py

@@ -97,8 +97,16 @@ class MaskRCNN(FasterRCNN):
                           lr_decay_epochs, lr_decay_gamma,
                           num_steps_each_epoch):
         if warmup_steps > lr_decay_epochs[0] * num_steps_each_epoch:
-            raise Exception("warmup_step should less than {}".format(
-                lr_decay_epochs[0] * num_steps_each_epoch))
+            logging.error(
+                "In function train(), parameters should satisfy: warmup_steps <= lr_decay_epochs[0]*num_samples_in_train_dataset",
+                exit=False)
+            logging.error(
+                "See this doc for more information: https://github.com/PaddlePaddle/PaddleX/blob/develop/docs/appendix/parameters.md#notice",
+                exit=False)
+            logging.error(
+                "warmup_steps should less than {} or lr_decay_epochs[0] greater than {}, please modify 'lr_decay_epochs' or 'warmup_steps' in train function".
+                format(lr_decay_epochs[0] * num_steps_each_epoch, warmup_steps
+                       // num_steps_each_epoch))
         boundaries = [b * num_steps_each_epoch for b in lr_decay_epochs]
         values = [(lr_decay_gamma**i) * learning_rate
                   for i in range(len(lr_decay_epochs) + 1)]
@@ -244,8 +252,7 @@ class MaskRCNN(FasterRCNN):
                 预测框坐标、预测框得分;'mask',对应元素预测区域结果列表,每个预测结果由图像id、
                 预测区域类别id、预测区域坐标、预测区域得分;’gt‘:真实标注框和标注区域相关信息。
         """
-        self.arrange_transforms(
-            transforms=eval_dataset.transforms, mode='eval')
+        self.arrange_transforms(transforms=eval_dataset.transforms, mode='eval')
         if metric is None:
             if hasattr(self, 'metric') and self.metric is not None:
                 metric = self.metric
@@ -266,9 +273,8 @@ class MaskRCNN(FasterRCNN):
 
         total_steps = math.ceil(eval_dataset.num_samples * 1.0 / batch_size)
         results = list()
-        logging.info(
-            "Start to evaluating(total_samples={}, total_steps={})...".format(
-                eval_dataset.num_samples, total_steps))
+        logging.info("Start to evaluating(total_samples={}, total_steps={})...".
+                     format(eval_dataset.num_samples, total_steps))
         for step, data in tqdm.tqdm(
                 enumerate(data_generator()), total=total_steps):
             images = np.array([d[0] for d in data]).astype('float32')
@@ -310,8 +316,7 @@ class MaskRCNN(FasterRCNN):
                     zip(['bbox_map', 'segm_map'],
                         [ap_stats[0][1], ap_stats[1][1]]))
             else:
-                metrics = OrderedDict(
-                    zip(['bbox_map', 'segm_map'], [0.0, 0.0]))
+                metrics = OrderedDict(zip(['bbox_map', 'segm_map'], [0.0, 0.0]))
         elif metric == 'COCO':
             if isinstance(ap_stats[0], np.ndarray) and isinstance(ap_stats[1],
                                                                   np.ndarray):
@@ -357,7 +362,8 @@ class MaskRCNN(FasterRCNN):
                                    'im_shape': im_shape
                                },
                                fetch_list=list(self.test_outputs.values()),
-                               return_numpy=False)
+                               return_numpy=False,
+                               use_program_cache=True)
         res = {
             k: (np.array(v), v.recursive_sequence_lengths())
             for k, v in zip(list(self.test_outputs.keys()), outputs)

+ 38 - 8
paddlex/cv/models/slim/prune.py

@@ -66,16 +66,15 @@ def sensitivity(program,
             progress = "%.2f%%" % (progress * 100)
             logging.info(
                 "Total evaluate iters={}, current={}, progress={}, eta={}".
-                format(
-                    total_evaluate_iters, current_iter, progress,
-                    seconds_to_hms(
-                        int(cost * (total_evaluate_iters - current_iter)))),
+                format(total_evaluate_iters, current_iter, progress,
+                       seconds_to_hms(
+                           int(cost * (total_evaluate_iters - current_iter)))),
                 use_color=True)
             current_iter += 1
 
             pruner = Pruner()
-            logging.info("sensitive - param: {}; ratios: {}".format(
-                name, ratio))
+            logging.info("sensitive - param: {}; ratios: {}".format(name,
+                                                                    ratio))
             pruned_program, param_backup, _ = pruner.prune(
                 program=graph.program,
                 scope=scope,
@@ -87,8 +86,8 @@ def sensitivity(program,
                 param_backup=True)
             pruned_metric = eval_func(pruned_program)
             loss = (baseline - pruned_metric) / baseline
-            logging.info("pruned param: {}; {}; loss={}".format(
-                name, ratio, loss))
+            logging.info("pruned param: {}; {}; loss={}".format(name, ratio,
+                                                                loss))
 
             sensitivities[name][ratio] = loss
 
@@ -116,6 +115,21 @@ def channel_prune(program, prune_names, prune_ratios, place, only_graph=False):
     Returns:
         paddle.fluid.Program: 裁剪后的Program。
     """
+    prog_var_shape_dict = {}
+    for var in program.list_vars():
+        try:
+            prog_var_shape_dict[var.name] = var.shape
+        except Exception:
+            pass
+    index = 0
+    for param, ratio in zip(prune_names, prune_ratios):
+        origin_num = prog_var_shape_dict[param][0]
+        pruned_num = int(round(origin_num * ratio))
+        while origin_num == pruned_num:
+            ratio -= 0.1
+            pruned_num = int(round(origin_num * (ratio)))
+            prune_ratios[index] = ratio
+        index += 1
     scope = fluid.global_scope()
     pruner = Pruner()
     program, _, _ = pruner.prune(
@@ -221,6 +235,9 @@ def cal_params_sensitivities(model, save_file, eval_dataset, batch_size=8):
 
             其中``weight_0``是卷积Kernel名;``sensitivities['weight_0']``是一个字典,key是裁剪率,value是敏感度。
     """
+    if os.path.exists(save_file):
+        os.remove(save_file)
+
     prune_names = get_prune_params(model)
 
     def eval_for_prune(program):
@@ -284,6 +301,19 @@ def cal_model_size(program, place, sensitivities_file, eval_metric_loss=0.05):
     """
     prune_params_ratios = get_params_ratios(sensitivities_file,
                                             eval_metric_loss)
+    prog_var_shape_dict = {}
+    for var in program.list_vars():
+        try:
+            prog_var_shape_dict[var.name] = var.shape
+        except Exception:
+            pass
+    for param, ratio in prune_params_ratios.items():
+        origin_num = prog_var_shape_dict[param][0]
+        pruned_num = int(round(origin_num * ratio))
+        while origin_num == pruned_num:
+            ratio -= 0.1
+            pruned_num = int(round(origin_num * (ratio)))
+            prune_params_ratios[param] = ratio
     prune_program = channel_prune(
         program,
         list(prune_params_ratios.keys()),

+ 5 - 2
paddlex/cv/models/slim/prune_config.py

@@ -142,13 +142,16 @@ def get_prune_params(model):
     program = model.test_prog
     if model_type.startswith('ResNet') or \
             model_type.startswith('DenseNet') or \
-            model_type.startswith('DarkNet'):
+            model_type.startswith('DarkNet') or \
+            model_type.startswith('AlexNet'):
         for block in program.blocks:
             for param in block.all_parameters():
                 pd_var = fluid.global_scope().find_var(param.name)
                 pd_param = pd_var.get_tensor()
                 if len(np.array(pd_param).shape) == 4:
                     prune_names.append(param.name)
+        if model_type == 'AlexNet':
+            prune_names.remove('conv5_weights')
     elif model_type == "MobileNetV1":
         prune_names.append("conv1_weights")
         for param in program.global_block().all_parameters():
@@ -162,7 +165,7 @@ def get_prune_params(model):
                 continue
             prune_names.append(param.name)
     elif model_type.startswith("MobileNetV3"):
-        if model_type == 'MobileNetV3_small':
+        if model_type.startswith('MobileNetV3_small'):
             expand_prune_id = [3, 4]
         else:
             expand_prune_id = [2, 3, 4, 8, 9, 11]

+ 8 - 4
paddlex/cv/models/utils/pretrain_weights.py

@@ -70,6 +70,8 @@ image_pretrain = {
     'https://paddle-imagenet-models-name.bj.bcebos.com/HRNet_W60_C_pretrained.tar',
     'HRNet_W64':
     'https://paddle-imagenet-models-name.bj.bcebos.com/HRNet_W64_C_pretrained.tar',
+    'AlexNet':
+    'http://paddle-imagenet-models-name.bj.bcebos.com/AlexNet_pretrained.tar'
 }
 
 coco_pretrain = {
@@ -99,10 +101,12 @@ def get_pretrain_weights(flag, model_type, backbone, save_dir):
                 backbone = 'DetResNet50'
         assert backbone in image_pretrain, "There is not ImageNet pretrain weights for {}, you may try COCO.".format(
             backbone)
-        #        url = image_pretrain[backbone]
-        #        fname = osp.split(url)[-1].split('.')[0]
-        #        paddlex.utils.download_and_decompress(url, path=new_save_dir)
-        #        return osp.join(new_save_dir, fname)
+
+        #        if backbone == 'AlexNet':
+        #            url = image_pretrain[backbone]
+        #            fname = osp.split(url)[-1].split('.')[0]
+        #            paddlex.utils.download_and_decompress(url, path=new_save_dir)
+        #            return osp.join(new_save_dir, fname)
         try:
             hub.download(backbone, save_path=new_save_dir)
         except Exception as e:

+ 15 - 8
paddlex/cv/models/yolo_v3.py

@@ -128,8 +128,16 @@ class YOLOv3(BaseAPI):
                           lr_decay_epochs, lr_decay_gamma,
                           num_steps_each_epoch):
         if warmup_steps > lr_decay_epochs[0] * num_steps_each_epoch:
-            raise Exception("warmup_steps should less than {}".format(
-                lr_decay_epochs[0] * num_steps_each_epoch))
+            logging.error(
+                "In function train(), parameters should satisfy: warmup_steps <= lr_decay_epochs[0]*num_samples_in_train_dataset",
+                exit=False)
+            logging.error(
+                "See this doc for more information: https://github.com/PaddlePaddle/PaddleX/blob/develop/docs/appendix/parameters.md#notice",
+                exit=False)
+            logging.error(
+                "warmup_steps should less than {} or lr_decay_epochs[0] greater than {}, please modify 'lr_decay_epochs' or 'warmup_steps' in train function".
+                format(lr_decay_epochs[0] * num_steps_each_epoch, warmup_steps
+                       // num_steps_each_epoch))
         boundaries = [b * num_steps_each_epoch for b in lr_decay_epochs]
         values = [(lr_decay_gamma**i) * learning_rate
                   for i in range(len(lr_decay_epochs) + 1)]
@@ -277,8 +285,7 @@ class YOLOv3(BaseAPI):
                 eval_details为dict,包含关键字:'bbox',对应元素预测结果列表,每个预测结果由图像id、
                 预测框类别id、预测框坐标、预测框得分;’gt‘:真实标注框相关信息。
         """
-        self.arrange_transforms(
-            transforms=eval_dataset.transforms, mode='eval')
+        self.arrange_transforms(transforms=eval_dataset.transforms, mode='eval')
         if metric is None:
             if hasattr(self, 'metric') and self.metric is not None:
                 metric = self.metric
@@ -298,9 +305,8 @@ class YOLOv3(BaseAPI):
 
         data_generator = eval_dataset.generator(
             batch_size=batch_size, drop_last=False)
-        logging.info(
-            "Start to evaluating(total_samples={}, total_steps={})...".format(
-                eval_dataset.num_samples, total_steps))
+        logging.info("Start to evaluating(total_samples={}, total_steps={})...".
+                     format(eval_dataset.num_samples, total_steps))
         for step, data in tqdm.tqdm(
                 enumerate(data_generator()), total=total_steps):
             images = np.array([d[0] for d in data])
@@ -363,7 +369,8 @@ class YOLOv3(BaseAPI):
                                feed={'image': im,
                                      'im_size': im_size},
                                fetch_list=list(self.test_outputs.values()),
-                               return_numpy=False)
+                               return_numpy=False,
+                               use_program_cache=True)
         res = {
             k: (np.array(v), v.recursive_sequence_lengths())
             for k, v in zip(list(self.test_outputs.keys()), outputs)

+ 6 - 0
paddlex/cv/nets/__init__.py

@@ -24,6 +24,7 @@ from .xception import Xception
 from .densenet import DenseNet
 from .shufflenet_v2 import ShuffleNetV2
 from .hrnet import HRNet
+from .alexnet import AlexNet
 
 
 def resnet18(input, num_classes=1000):
@@ -153,3 +154,8 @@ def shufflenetv2(input, num_classes=1000):
 def hrnet_w18(input, num_classes=1000):
     model = HRNet(width=18, num_classes=num_classes)
     return model(input)
+
+
+def alexnet(input, num_classes=1000):
+    model = AlexNet(num_classes=num_classes)
+    return model(input)

+ 170 - 0
paddlex/cv/nets/alexnet.py

@@ -0,0 +1,170 @@
+#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import math
+
+import paddle
+import paddle.fluid as fluid
+
+
+class AlexNet():
+    def __init__(self, num_classes=1000):
+        assert num_classes is not None, "In AlextNet, num_classes cannot be None"
+        self.num_classes = num_classes
+
+    def __call__(self, input):
+        stdv = 1.0 / math.sqrt(input.shape[1] * 11 * 11)
+        layer_name = [
+            "conv1", "conv2", "conv3", "conv4", "conv5", "fc6", "fc7", "fc8"
+        ]
+        conv1 = fluid.layers.conv2d(
+            input=input,
+            num_filters=64,
+            filter_size=11,
+            stride=4,
+            padding=2,
+            groups=1,
+            act='relu',
+            bias_attr=fluid.param_attr.ParamAttr(
+                initializer=fluid.initializer.Uniform(-stdv, stdv),
+                name=layer_name[0] + "_offset"),
+            param_attr=fluid.param_attr.ParamAttr(
+                initializer=fluid.initializer.Uniform(-stdv, stdv),
+                name=layer_name[0] + "_weights"))
+        pool1 = fluid.layers.pool2d(
+            input=conv1,
+            pool_size=3,
+            pool_stride=2,
+            pool_padding=0,
+            pool_type='max')
+
+        stdv = 1.0 / math.sqrt(pool1.shape[1] * 5 * 5)
+        conv2 = fluid.layers.conv2d(
+            input=pool1,
+            num_filters=192,
+            filter_size=5,
+            stride=1,
+            padding=2,
+            groups=1,
+            act='relu',
+            bias_attr=fluid.param_attr.ParamAttr(
+                initializer=fluid.initializer.Uniform(-stdv, stdv),
+                name=layer_name[1] + "_offset"),
+            param_attr=fluid.param_attr.ParamAttr(
+                initializer=fluid.initializer.Uniform(-stdv, stdv),
+                name=layer_name[1] + "_weights"))
+        pool2 = fluid.layers.pool2d(
+            input=conv2,
+            pool_size=3,
+            pool_stride=2,
+            pool_padding=0,
+            pool_type='max')
+
+        stdv = 1.0 / math.sqrt(pool2.shape[1] * 3 * 3)
+        conv3 = fluid.layers.conv2d(
+            input=pool2,
+            num_filters=384,
+            filter_size=3,
+            stride=1,
+            padding=1,
+            groups=1,
+            act='relu',
+            bias_attr=fluid.param_attr.ParamAttr(
+                initializer=fluid.initializer.Uniform(-stdv, stdv),
+                name=layer_name[2] + "_offset"),
+            param_attr=fluid.param_attr.ParamAttr(
+                initializer=fluid.initializer.Uniform(-stdv, stdv),
+                name=layer_name[2] + "_weights"))
+
+        stdv = 1.0 / math.sqrt(conv3.shape[1] * 3 * 3)
+        conv4 = fluid.layers.conv2d(
+            input=conv3,
+            num_filters=256,
+            filter_size=3,
+            stride=1,
+            padding=1,
+            groups=1,
+            act='relu',
+            bias_attr=fluid.param_attr.ParamAttr(
+                initializer=fluid.initializer.Uniform(-stdv, stdv),
+                name=layer_name[3] + "_offset"),
+            param_attr=fluid.param_attr.ParamAttr(
+                initializer=fluid.initializer.Uniform(-stdv, stdv),
+                name=layer_name[3] + "_weights"))
+
+        stdv = 1.0 / math.sqrt(conv4.shape[1] * 3 * 3)
+        conv5 = fluid.layers.conv2d(
+            input=conv4,
+            num_filters=256,
+            filter_size=3,
+            stride=1,
+            padding=1,
+            groups=1,
+            act='relu',
+            bias_attr=fluid.param_attr.ParamAttr(
+                initializer=fluid.initializer.Uniform(-stdv, stdv),
+                name=layer_name[4] + "_offset"),
+            param_attr=fluid.param_attr.ParamAttr(
+                initializer=fluid.initializer.Uniform(-stdv, stdv),
+                name=layer_name[4] + "_weights"))
+        pool5 = fluid.layers.pool2d(
+            input=conv5,
+            pool_size=3,
+            pool_stride=2,
+            pool_padding=0,
+            pool_type='max')
+
+        drop6 = fluid.layers.dropout(x=pool5, dropout_prob=0.5)
+        stdv = 1.0 / math.sqrt(drop6.shape[1] * drop6.shape[2] *
+                               drop6.shape[3] * 1.0)
+
+        fc6 = fluid.layers.fc(
+            input=drop6,
+            size=4096,
+            act='relu',
+            bias_attr=fluid.param_attr.ParamAttr(
+                initializer=fluid.initializer.Uniform(-stdv, stdv),
+                name=layer_name[5] + "_offset"),
+            param_attr=fluid.param_attr.ParamAttr(
+                initializer=fluid.initializer.Uniform(-stdv, stdv),
+                name=layer_name[5] + "_weights"))
+        drop7 = fluid.layers.dropout(x=fc6, dropout_prob=0.5)
+        stdv = 1.0 / math.sqrt(drop7.shape[1] * 1.0)
+
+        fc7 = fluid.layers.fc(
+            input=drop7,
+            size=4096,
+            act='relu',
+            bias_attr=fluid.param_attr.ParamAttr(
+                initializer=fluid.initializer.Uniform(-stdv, stdv),
+                name=layer_name[6] + "_offset"),
+            param_attr=fluid.param_attr.ParamAttr(
+                initializer=fluid.initializer.Uniform(-stdv, stdv),
+                name=layer_name[6] + "_weights"))
+
+        stdv = 1.0 / math.sqrt(fc7.shape[1] * 1.0)
+        out = fluid.layers.fc(
+            input=fc7,
+            size=self.num_classes,
+            bias_attr=fluid.param_attr.ParamAttr(
+                initializer=fluid.initializer.Uniform(-stdv, stdv),
+                name=layer_name[7] + "_offset"),
+            param_attr=fluid.param_attr.ParamAttr(
+                initializer=fluid.initializer.Uniform(-stdv, stdv),
+                name=layer_name[7] + "_weights"))
+        return out

+ 2 - 2
paddlex/cv/nets/hrnet.py

@@ -71,7 +71,7 @@ class HRNet(object):
         self.end_points = []
         return
 
-    def net(self, input, class_dim=1000):
+    def net(self, input):
         width = self.width
         channels_2, channels_3, channels_4 = self.channels[width]
         num_modules_2, num_modules_3, num_modules_4 = 1, 4, 3
@@ -125,7 +125,7 @@ class HRNet(object):
             stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0)
             out = fluid.layers.fc(
                 input=pool,
-                size=class_dim,
+                size=self.num_classes,
                 param_attr=ParamAttr(
                     name='fc_weights',
                     initializer=fluid.initializer.Uniform(-stdv, stdv)),

+ 6 - 1
paddlex/cv/transforms/cls_transforms.py

@@ -18,6 +18,7 @@ import random
 import os.path as osp
 import numpy as np
 from PIL import Image, ImageEnhance
+import paddlex.utils.logging as logging
 
 
 class ClsTransform:
@@ -96,7 +97,11 @@ class Compose(ClsTransform):
         if not isinstance(augmenters, list):
             raise Exception(
                 "augmenters should be list type in func add_augmenters()")
-        self.transforms = augmenters + self.transforms.transforms
+        transform_names = [type(x).__name__ for x in self.transforms]
+        for aug in augmenters:
+            if type(aug).__name__ in transform_names:
+                logging.error("{} is already in ComposedTransforms, need to remove it from add_augmenters().".format(type(aug).__name__))
+        self.transforms = augmenters + self.transforms
 
 
 class RandomCrop(ClsTransform):

+ 8 - 3
paddlex/cv/transforms/det_transforms.py

@@ -27,6 +27,7 @@ from PIL import Image, ImageEnhance
 from .imgaug_support import execute_imgaug
 from .ops import *
 from .box_utils import *
+import paddlex.utils.logging as logging
 
 
 class DetTransform:
@@ -156,7 +157,11 @@ class Compose(DetTransform):
         if not isinstance(augmenters, list):
             raise Exception(
                 "augmenters should be list type in func add_augmenters()")
-        self.transforms = augmenters + self.transforms.transforms
+        transform_names = [type(x).__name__ for x in self.transforms]
+        for aug in augmenters:
+            if type(aug).__name__ in transform_names:
+                logging.error("{} is already in ComposedTransforms, need to remove it from add_augmenters().".format(type(aug).__name__))
+        self.transforms = augmenters + self.transforms
 
 
 class ResizeByShort(DetTransform):
@@ -1303,7 +1308,7 @@ class ComposedRCNNTransforms(Compose):
         super(ComposedRCNNTransforms, self).__init__(transforms)
 
 
-class ComposedYOLOTransforms(Compose):
+class ComposedYOLOv3Transforms(Compose):
     """YOLOv3模型的图像预处理流程,具体如下,
         训练阶段:
         1. 在前mixup_epoch轮迭代中,使用MixupImage策略,见https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/det_transforms.html#mixupimage
@@ -1358,4 +1363,4 @@ class ComposedYOLOTransforms(Compose):
                     target_size=width, interp='CUBIC'), Normalize(
                         mean=mean, std=std)
             ]
-        super(ComposedYOLOTransforms, self).__init__(transforms)
+        super(ComposedYOLOv3Transforms, self).__init__(transforms)

+ 7 - 2
paddlex/cv/transforms/seg_transforms.py

@@ -21,6 +21,7 @@ import numpy as np
 from PIL import Image
 import cv2
 from collections import OrderedDict
+import paddlex.utils.logging as logging
 
 
 class SegTransform:
@@ -112,7 +113,11 @@ class Compose(SegTransform):
         if not isinstance(augmenters, list):
             raise Exception(
                 "augmenters should be list type in func add_augmenters()")
-        self.transforms = augmenters + self.transforms.transforms
+        transform_names = [type(x).__name__ for x in self.transforms]
+        for aug in augmenters:
+            if type(aug).__name__ in transform_names:
+                logging.error("{} is already in ComposedTransforms, need to remove it from add_augmenters().".format(type(aug).__name__))
+        self.transforms = augmenters + self.transforms
 
 
 class RandomHorizontalFlip(SegTransform):
@@ -1127,6 +1132,6 @@ class ComposedSegTransforms(Compose):
             ]
         else:
             # 验证/预测时的transforms
-            transforms = [Resize(512), Normalize(mean=mean, std=std)]
+            transforms = [Normalize(mean=mean, std=std)]
 
         super(ComposedSegTransforms, self).__init__(transforms)

+ 24 - 8
paddlex/interpret/core/_session_preparation.py

@@ -20,6 +20,7 @@ import numpy as np
 from paddle.fluid.param_attr import ParamAttr
 from paddlex.interpret.as_data_reader.readers import preprocess_image
 
+
 def gen_user_home():
     if "HOME" in os.environ:
         home_path = os.environ["HOME"]
@@ -34,10 +35,20 @@ def paddle_get_fc_weights(var_name="fc_0.w_0"):
 
 
 def paddle_resize(extracted_features, outsize):
-    resized_features = fluid.layers.resize_bilinear(extracted_features, outsize)
+    resized_features = fluid.layers.resize_bilinear(extracted_features,
+                                                    outsize)
     return resized_features
 
 
+def get_precomputed_normlime_weights():
+    root_path = gen_user_home()
+    root_path = osp.join(root_path, '.paddlex')
+    h_pre_models = osp.join(root_path, "pre_models")
+    normlime_weights_file = osp.join(
+        h_pre_models, "normlime_weights_imagenet_resnet50vc.npy")
+    return np.load(normlime_weights_file, allow_pickle=True).item()
+
+
 def compute_features_for_kmeans(data_content):
     root_path = gen_user_home()
     root_path = osp.join(root_path, '.paddlex')
@@ -47,6 +58,7 @@ def compute_features_for_kmeans(data_content):
             os.makedirs(root_path)
         url = "https://bj.bcebos.com/paddlex/interpret/pre_models.tar.gz"
         pdx.utils.download_and_decompress(url, path=root_path)
+
     def conv_bn_layer(input,
                       num_filters,
                       filter_size,
@@ -55,7 +67,7 @@ def compute_features_for_kmeans(data_content):
                       act=None,
                       name=None,
                       is_test=True,
-                      global_name=''):
+                      global_name='for_kmeans_'):
         conv = fluid.layers.conv2d(
             input=input,
             num_filters=num_filters,
@@ -79,14 +91,14 @@ def compute_features_for_kmeans(data_content):
             bias_attr=ParamAttr(global_name + bn_name + '_offset'),
             moving_mean_name=global_name + bn_name + '_mean',
             moving_variance_name=global_name + bn_name + '_variance',
-            use_global_stats=is_test
-        )
+            use_global_stats=is_test)
 
     startup_prog = fluid.default_startup_program().clone(for_test=True)
     prog = fluid.Program()
     with fluid.program_guard(prog, startup_prog):
         with fluid.unique_name.guard():
-            image_op = fluid.data(name='image', shape=[None, 3, 224, 224], dtype='float32')
+            image_op = fluid.data(
+                name='image', shape=[None, 3, 224, 224], dtype='float32')
 
             conv = conv_bn_layer(
                 input=image_op,
@@ -110,7 +122,8 @@ def compute_features_for_kmeans(data_content):
                 act='relu',
                 name='conv1_3')
             extracted_features = conv
-            resized_features = fluid.layers.resize_bilinear(extracted_features, image_op.shape[2:])
+            resized_features = fluid.layers.resize_bilinear(extracted_features,
+                                                            image_op.shape[2:])
 
     gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0))
     place = fluid.CUDAPlace(gpu_id)
@@ -119,7 +132,10 @@ def compute_features_for_kmeans(data_content):
     exe.run(startup_prog)
     fluid.io.load_persistables(exe, h_pre_models, prog)
 
-    images = preprocess_image(data_content)  # transpose to [N, 3, H, W], scaled to [0.0, 1.0]
-    result = exe.run(prog, fetch_list=[resized_features], feed={'image': images})
+    images = preprocess_image(
+        data_content)  # transpose to [N, 3, H, W], scaled to [0.0, 1.0]
+    result = exe.run(prog,
+                     fetch_list=[resized_features],
+                     feed={'image': images})
 
     return result[0][0]

+ 7 - 11
paddlex/interpret/core/interpretation.py

@@ -20,12 +20,10 @@ class Interpretation(object):
     """
     Base class for all interpretation algorithms.
     """
-    def __init__(self, interpretation_algorithm_name, predict_fn, label_names, **kwargs):
-        supported_algorithms = {
-            'cam': CAM,
-            'lime': LIME,
-            'normlime': NormLIME
-        }
+
+    def __init__(self, interpretation_algorithm_name, predict_fn, label_names,
+                 **kwargs):
+        supported_algorithms = {'cam': CAM, 'lime': LIME, 'normlime': NormLIME}
 
         self.algorithm_name = interpretation_algorithm_name.lower()
         assert self.algorithm_name in supported_algorithms.keys()
@@ -33,19 +31,17 @@ class Interpretation(object):
 
         # initialization for the interpretation algorithm.
         self.algorithm = supported_algorithms[self.algorithm_name](
-            self.predict_fn, label_names, **kwargs
-        )
+            self.predict_fn, label_names, **kwargs)
 
-    def interpret(self, data_, visualization=True, save_to_disk=True, save_dir='./tmp'):
+    def interpret(self, data_, visualization=True, save_dir='./'):
         """
 
         Args:
             data_: data_ can be a path or numpy.ndarray.
             visualization: whether to show using matplotlib.
-            save_to_disk: whether to save the figure in local disk.
             save_dir: dir to save figure if save_to_disk is True.
 
         Returns:
 
         """
-        return self.algorithm.interpret(data_, visualization, save_to_disk, save_dir)
+        return self.algorithm.interpret(data_, visualization, save_dir)

+ 316 - 106
paddlex/interpret/core/interpretation_algorithms.py

@@ -23,7 +23,6 @@ from .normlime_base import combine_normlime_and_lime, get_feature_for_kmeans, lo
 from paddlex.interpret.as_data_reader.readers import read_image
 import paddlex.utils.logging as logging
 
-
 import cv2
 
 
@@ -66,25 +65,27 @@ class CAM(object):
 
         fc_weights = paddle_get_fc_weights()
         feature_maps = result[1]
-        
+
         l = pred_label[0]
         ln = l
         if self.label_names is not None:
             ln = self.label_names[l]
 
         prob_str = "%.3f" % (probability[pred_label[0]])
-        logging.info("predicted result: {} with probability {}.".format(ln, prob_str))
+        logging.info("predicted result: {} with probability {}.".format(
+            ln, prob_str))
         return feature_maps, fc_weights
 
-    def interpret(self, data_, visualization=True, save_to_disk=True, save_outdir=None):
+    def interpret(self, data_, visualization=True, save_outdir=None):
         feature_maps, fc_weights = self.preparation_cam(data_)
-        cam = get_cam(self.image, feature_maps, fc_weights, self.predicted_label)
+        cam = get_cam(self.image, feature_maps, fc_weights,
+                      self.predicted_label)
 
-        if visualization or save_to_disk:
+        if visualization or save_outdir is not None:
             import matplotlib.pyplot as plt
             from skimage.segmentation import mark_boundaries
             l = self.labels[0]
-            ln = l 
+            ln = l
             if self.label_names is not None:
                 ln = self.label_names[l]
 
@@ -93,7 +94,8 @@ class CAM(object):
             ncols = 2
 
             plt.close()
-            f, axes = plt.subplots(nrows, ncols, figsize=(psize * ncols, psize * nrows))
+            f, axes = plt.subplots(
+                nrows, ncols, figsize=(psize * ncols, psize * nrows))
             for ax in axes.ravel():
                 ax.axis("off")
             axes = axes.ravel()
@@ -104,8 +106,7 @@ class CAM(object):
             axes[1].imshow(cam)
             axes[1].set_title("CAM")
 
-        if save_to_disk and save_outdir is not None:
-            os.makedirs(save_outdir, exist_ok=True)
+        if save_outdir is not None:
             save_fig(data_, save_outdir, 'cam')
 
         if visualization:
@@ -115,7 +116,11 @@ class CAM(object):
 
 
 class LIME(object):
-    def __init__(self, predict_fn, label_names, num_samples=3000, batch_size=50):
+    def __init__(self,
+                 predict_fn,
+                 label_names,
+                 num_samples=3000,
+                 batch_size=50):
         """
         LIME wrapper. See lime_base.py for the detailed LIME implementation.
         Args:
@@ -154,31 +159,37 @@ class LIME(object):
         self.predicted_probability = probability[pred_label[0]]
         self.image = image_show[0]
         self.labels = pred_label
-        
+
         l = pred_label[0]
         ln = l
         if self.label_names is not None:
             ln = self.label_names[l]
-            
+
         prob_str = "%.3f" % (probability[pred_label[0]])
-        logging.info("predicted result: {} with probability {}.".format(ln, prob_str))
+        logging.info("predicted result: {} with probability {}.".format(
+            ln, prob_str))
 
         end = time.time()
         algo = lime_base.LimeImageInterpreter()
-        interpreter = algo.interpret_instance(self.image, self.predict_fn, self.labels, 0,
-                                              num_samples=self.num_samples, batch_size=self.batch_size)
+        interpreter = algo.interpret_instance(
+            self.image,
+            self.predict_fn,
+            self.labels,
+            0,
+            num_samples=self.num_samples,
+            batch_size=self.batch_size)
         self.lime_interpreter = interpreter
         logging.info('lime time: ' + str(time.time() - end) + 's.')
 
-    def interpret(self, data_, visualization=True, save_to_disk=True, save_outdir=None):
+    def interpret(self, data_, visualization=True, save_outdir=None):
         if self.lime_interpreter is None:
             self.preparation_lime(data_)
 
-        if visualization or save_to_disk:
+        if visualization or save_outdir is not None:
             import matplotlib.pyplot as plt
             from skimage.segmentation import mark_boundaries
             l = self.labels[0]
-            ln = l 
+            ln = l
             if self.label_names is not None:
                 ln = self.label_names[l]
 
@@ -188,7 +199,8 @@ class LIME(object):
             ncols = len(weights_choices)
 
             plt.close()
-            f, axes = plt.subplots(nrows, ncols, figsize=(psize * ncols, psize * nrows))
+            f, axes = plt.subplots(
+                nrows, ncols, figsize=(psize * ncols, psize * nrows))
             for ax in axes.ravel():
                 ax.axis("off")
             axes = axes.ravel()
@@ -196,20 +208,24 @@ class LIME(object):
             prob_str = "{%.3f}" % (self.predicted_probability)
             axes[0].set_title("label {}, proba: {}".format(ln, prob_str))
 
-            axes[1].imshow(mark_boundaries(self.image, self.lime_interpreter.segments))
+            axes[1].imshow(
+                mark_boundaries(self.image, self.lime_interpreter.segments))
             axes[1].set_title("superpixel segmentation")
 
             # LIME visualization
             for i, w in enumerate(weights_choices):
-                num_to_show = auto_choose_num_features_to_show(self.lime_interpreter, l, w)
+                num_to_show = auto_choose_num_features_to_show(
+                    self.lime_interpreter, l, w)
                 temp, mask = self.lime_interpreter.get_image_and_mask(
-                    l, positive_only=False, hide_rest=False, num_features=num_to_show
-                )
+                    l,
+                    positive_only=True,
+                    hide_rest=False,
+                    num_features=num_to_show)
                 axes[ncols + i].imshow(mark_boundaries(temp, mask))
-                axes[ncols + i].set_title("label {}, first {} superpixels".format(ln, num_to_show))
+                axes[ncols + i].set_title(
+                    "label {}, first {} superpixels".format(ln, num_to_show))
 
-        if save_to_disk and save_outdir is not None:
-            os.makedirs(save_outdir, exist_ok=True)
+        if save_outdir is not None:
             save_fig(data_, save_outdir, 'lime', self.num_samples)
 
         if visualization:
@@ -218,9 +234,196 @@ class LIME(object):
         return
 
 
+class NormLIMEStandard(object):
+    def __init__(self,
+                 predict_fn,
+                 label_names,
+                 num_samples=3000,
+                 batch_size=50,
+                 kmeans_model_for_normlime=None,
+                 normlime_weights=None):
+        root_path = gen_user_home()
+        root_path = osp.join(root_path, '.paddlex')
+        h_pre_models = osp.join(root_path, "pre_models")
+        if not osp.exists(h_pre_models):
+            if not osp.exists(root_path):
+                os.makedirs(root_path)
+            url = "https://bj.bcebos.com/paddlex/interpret/pre_models.tar.gz"
+            pdx.utils.download_and_decompress(url, path=root_path)
+        h_pre_models_kmeans = osp.join(h_pre_models, "kmeans_model.pkl")
+        if kmeans_model_for_normlime is None:
+            try:
+                self.kmeans_model = load_kmeans_model(h_pre_models_kmeans)
+            except:
+                raise ValueError(
+                    "NormLIME needs the KMeans model, where we provided a default one in "
+                    "pre_models/kmeans_model.pkl.")
+        else:
+            logging.debug("Warning: It is *strongly* suggested to use the \
+            default KMeans model in pre_models/kmeans_model.pkl. \
+            Use another one will change the final result.")
+            self.kmeans_model = load_kmeans_model(kmeans_model_for_normlime)
+
+        self.num_samples = num_samples
+        self.batch_size = batch_size
+
+        try:
+            self.normlime_weights = np.load(
+                normlime_weights, allow_pickle=True).item()
+        except:
+            self.normlime_weights = None
+            logging.debug(
+                "Warning: not find the correct precomputed Normlime result.")
+
+        self.predict_fn = predict_fn
+
+        self.labels = None
+        self.image = None
+        self.label_names = label_names
+
+    def predict_cluster_labels(self, feature_map, segments):
+        X = get_feature_for_kmeans(feature_map, segments)
+        try:
+            cluster_labels = self.kmeans_model.predict(X)
+        except AttributeError:
+            from sklearn.metrics import pairwise_distances_argmin_min
+            cluster_labels, _ = pairwise_distances_argmin_min(
+                X, self.kmeans_model.cluster_centers_)
+        return cluster_labels
+
+    def predict_using_normlime_weights(self, pred_labels,
+                                       predicted_cluster_labels):
+        # global weights
+        g_weights = {y: [] for y in pred_labels}
+        for y in pred_labels:
+            cluster_weights_y = self.normlime_weights.get(y, {})
+            g_weights[y] = [(i, cluster_weights_y.get(k, 0.0))
+                            for i, k in enumerate(predicted_cluster_labels)]
+
+            g_weights[y] = sorted(
+                g_weights[y], key=lambda x: np.abs(x[1]), reverse=True)
+
+        return g_weights
+
+    def preparation_normlime(self, data_):
+        self._lime = LIME(self.predict_fn, self.label_names, self.num_samples,
+                          self.batch_size)
+        self._lime.preparation_lime(data_)
+
+        image_show = read_image(data_)
+
+        self.predicted_label = self._lime.predicted_label
+        self.predicted_probability = self._lime.predicted_probability
+        self.image = image_show[0]
+        self.labels = self._lime.labels
+        logging.info('performing NormLIME operations ...')
+
+        cluster_labels = self.predict_cluster_labels(
+            compute_features_for_kmeans(image_show).transpose((1, 2, 0)),
+            self._lime.lime_interpreter.segments)
+
+        g_weights = self.predict_using_normlime_weights(self.labels,
+                                                        cluster_labels)
+
+        return g_weights
+
+    def interpret(self, data_, visualization=True, save_outdir=None):
+        if self.normlime_weights is None:
+            raise ValueError(
+                "Not find the correct precomputed NormLIME result. \n"
+                "\t Try to call compute_normlime_weights() first or load the correct path."
+            )
+
+        g_weights = self.preparation_normlime(data_)
+        lime_weights = self._lime.lime_interpreter.local_weights
+
+        if visualization or save_outdir is not None:
+            import matplotlib.pyplot as plt
+            from skimage.segmentation import mark_boundaries
+            l = self.labels[0]
+            ln = l
+            if self.label_names is not None:
+                ln = self.label_names[l]
+
+            psize = 5
+            nrows = 4
+            weights_choices = [0.6, 0.7, 0.75, 0.8, 0.85]
+            nums_to_show = []
+            ncols = len(weights_choices)
+
+            plt.close()
+            f, axes = plt.subplots(
+                nrows, ncols, figsize=(psize * ncols, psize * nrows))
+            for ax in axes.ravel():
+                ax.axis("off")
+
+            axes = axes.ravel()
+            axes[0].imshow(self.image)
+            prob_str = "{%.3f}" % (self.predicted_probability)
+            axes[0].set_title("label {}, proba: {}".format(ln, prob_str))
+
+            axes[1].imshow(
+                mark_boundaries(self.image,
+                                self._lime.lime_interpreter.segments))
+            axes[1].set_title("superpixel segmentation")
+
+            # LIME visualization
+            for i, w in enumerate(weights_choices):
+                num_to_show = auto_choose_num_features_to_show(
+                    self._lime.lime_interpreter, l, w)
+                nums_to_show.append(num_to_show)
+                temp, mask = self._lime.lime_interpreter.get_image_and_mask(
+                    l,
+                    positive_only=False,
+                    hide_rest=False,
+                    num_features=num_to_show)
+                axes[ncols + i].imshow(mark_boundaries(temp, mask))
+                axes[ncols + i].set_title("LIME: first {} superpixels".format(
+                    num_to_show))
+
+            # NormLIME visualization
+            self._lime.lime_interpreter.local_weights = g_weights
+            for i, num_to_show in enumerate(nums_to_show):
+                temp, mask = self._lime.lime_interpreter.get_image_and_mask(
+                    l,
+                    positive_only=False,
+                    hide_rest=False,
+                    num_features=num_to_show)
+                axes[ncols * 2 + i].imshow(mark_boundaries(temp, mask))
+                axes[ncols * 2 + i].set_title(
+                    "NormLIME: first {} superpixels".format(num_to_show))
+
+            # NormLIME*LIME visualization
+            combined_weights = combine_normlime_and_lime(lime_weights,
+                                                         g_weights)
+            self._lime.lime_interpreter.local_weights = combined_weights
+            for i, num_to_show in enumerate(nums_to_show):
+                temp, mask = self._lime.lime_interpreter.get_image_and_mask(
+                    l,
+                    positive_only=False,
+                    hide_rest=False,
+                    num_features=num_to_show)
+                axes[ncols * 3 + i].imshow(mark_boundaries(temp, mask))
+                axes[ncols * 3 + i].set_title(
+                    "Combined: first {} superpixels".format(num_to_show))
+
+            self._lime.lime_interpreter.local_weights = lime_weights
+
+        if save_outdir is not None:
+            save_fig(data_, save_outdir, 'normlime', self.num_samples)
+
+        if visualization:
+            plt.show()
+
+
 class NormLIME(object):
-    def __init__(self, predict_fn, label_names, num_samples=3000, batch_size=50,
-                 kmeans_model_for_normlime=None, normlime_weights=None):
+    def __init__(self,
+                 predict_fn,
+                 label_names,
+                 num_samples=3000,
+                 batch_size=50,
+                 kmeans_model_for_normlime=None,
+                 normlime_weights=None):
         root_path = gen_user_home()
         root_path = osp.join(root_path, '.paddlex')
         h_pre_models = osp.join(root_path, "pre_models")
@@ -234,8 +437,9 @@ class NormLIME(object):
             try:
                 self.kmeans_model = load_kmeans_model(h_pre_models_kmeans)
             except:
-                raise ValueError("NormLIME needs the KMeans model, where we provided a default one in "
-                                 "pre_models/kmeans_model.pkl.")
+                raise ValueError(
+                    "NormLIME needs the KMeans model, where we provided a default one in "
+                    "pre_models/kmeans_model.pkl.")
         else:
             logging.debug("Warning: It is *strongly* suggested to use the \
             default KMeans model in pre_models/kmeans_model.pkl. \
@@ -246,10 +450,12 @@ class NormLIME(object):
         self.batch_size = batch_size
 
         try:
-            self.normlime_weights = np.load(normlime_weights, allow_pickle=True).item()
+            self.normlime_weights = np.load(
+                normlime_weights, allow_pickle=True).item()
         except:
             self.normlime_weights = None
-            logging.debug("Warning: not find the correct precomputed Normlime result.")
+            logging.debug(
+                "Warning: not find the correct precomputed Normlime result.")
 
         self.predict_fn = predict_fn
 
@@ -263,30 +469,27 @@ class NormLIME(object):
             cluster_labels = self.kmeans_model.predict(X)
         except AttributeError:
             from sklearn.metrics import pairwise_distances_argmin_min
-            cluster_labels, _ = pairwise_distances_argmin_min(X, self.kmeans_model.cluster_centers_)
+            cluster_labels, _ = pairwise_distances_argmin_min(
+                X, self.kmeans_model.cluster_centers_)
         return cluster_labels
 
-    def predict_using_normlime_weights(self, pred_labels, predicted_cluster_labels):
+    def predict_using_normlime_weights(self, pred_labels,
+                                       predicted_cluster_labels):
         # global weights
         g_weights = {y: [] for y in pred_labels}
         for y in pred_labels:
             cluster_weights_y = self.normlime_weights.get(y, {})
-            g_weights[y] = [
-                (i, cluster_weights_y.get(k, 0.0)) for i, k in enumerate(predicted_cluster_labels)
-            ]
+            g_weights[y] = [(i, cluster_weights_y.get(k, 0.0))
+                            for i, k in enumerate(predicted_cluster_labels)]
 
-            g_weights[y] = sorted(g_weights[y],
-                                  key=lambda x: np.abs(x[1]), reverse=True)
+            g_weights[y] = sorted(
+                g_weights[y], key=lambda x: np.abs(x[1]), reverse=True)
 
         return g_weights
 
     def preparation_normlime(self, data_):
-        self._lime = LIME(
-            self.predict_fn,
-            self.label_names,
-            self.num_samples,
-            self.batch_size
-        )
+        self._lime = LIME(self.predict_fn, self.label_names, self.num_samples,
+                          self.batch_size)
         self._lime.preparation_lime(data_)
 
         image_show = read_image(data_)
@@ -298,22 +501,25 @@ class NormLIME(object):
         logging.info('performing NormLIME operations ...')
 
         cluster_labels = self.predict_cluster_labels(
-            compute_features_for_kmeans(image_show).transpose((1, 2, 0)), self._lime.lime_interpreter.segments
-        )
+            compute_features_for_kmeans(image_show).transpose((1, 2, 0)),
+            self._lime.lime_interpreter.segments)
 
-        g_weights = self.predict_using_normlime_weights(self.labels, cluster_labels)
+        g_weights = self.predict_using_normlime_weights(self.labels,
+                                                        cluster_labels)
 
         return g_weights
 
-    def interpret(self, data_, visualization=True, save_to_disk=True, save_outdir=None):
+    def interpret(self, data_, visualization=True, save_outdir=None):
         if self.normlime_weights is None:
-            raise ValueError("Not find the correct precomputed NormLIME result. \n"
-                             "\t Try to call compute_normlime_weights() first or load the correct path.")
+            raise ValueError(
+                "Not find the correct precomputed NormLIME result. \n"
+                "\t Try to call compute_normlime_weights() first or load the correct path."
+            )
 
         g_weights = self.preparation_normlime(data_)
         lime_weights = self._lime.lime_interpreter.local_weights
 
-        if visualization or save_to_disk:
+        if visualization or save_outdir is not None:
             import matplotlib.pyplot as plt
             from skimage.segmentation import mark_boundaries
             l = self.labels[0]
@@ -328,7 +534,8 @@ class NormLIME(object):
             ncols = len(weights_choices)
 
             plt.close()
-            f, axes = plt.subplots(nrows, ncols, figsize=(psize * ncols, psize * nrows))
+            f, axes = plt.subplots(
+                nrows, ncols, figsize=(psize * ncols, psize * nrows))
             for ax in axes.ravel():
                 ax.axis("off")
 
@@ -337,64 +544,83 @@ class NormLIME(object):
             prob_str = "{%.3f}" % (self.predicted_probability)
             axes[0].set_title("label {}, proba: {}".format(ln, prob_str))
 
-            axes[1].imshow(mark_boundaries(self.image, self._lime.lime_interpreter.segments))
+            axes[1].imshow(
+                mark_boundaries(self.image,
+                                self._lime.lime_interpreter.segments))
             axes[1].set_title("superpixel segmentation")
 
             # LIME visualization
             for i, w in enumerate(weights_choices):
-                num_to_show = auto_choose_num_features_to_show(self._lime.lime_interpreter, l, w)
+                num_to_show = auto_choose_num_features_to_show(
+                    self._lime.lime_interpreter, l, w)
                 nums_to_show.append(num_to_show)
                 temp, mask = self._lime.lime_interpreter.get_image_and_mask(
-                    l, positive_only=False, hide_rest=False, num_features=num_to_show
-                )
+                    l,
+                    positive_only=True,
+                    hide_rest=False,
+                    num_features=num_to_show)
                 axes[ncols + i].imshow(mark_boundaries(temp, mask))
-                axes[ncols + i].set_title("LIME: first {} superpixels".format(num_to_show))
+                axes[ncols + i].set_title("LIME: first {} superpixels".format(
+                    num_to_show))
 
             # NormLIME visualization
             self._lime.lime_interpreter.local_weights = g_weights
             for i, num_to_show in enumerate(nums_to_show):
                 temp, mask = self._lime.lime_interpreter.get_image_and_mask(
-                    l, positive_only=False, hide_rest=False, num_features=num_to_show
-                )
+                    l,
+                    positive_only=True,
+                    hide_rest=False,
+                    num_features=num_to_show)
                 axes[ncols * 2 + i].imshow(mark_boundaries(temp, mask))
-                axes[ncols * 2 + i].set_title("NormLIME: first {} superpixels".format(num_to_show))
+                axes[ncols * 2 + i].set_title(
+                    "NormLIME: first {} superpixels".format(num_to_show))
 
             # NormLIME*LIME visualization
-            combined_weights = combine_normlime_and_lime(lime_weights, g_weights)
+            combined_weights = combine_normlime_and_lime(lime_weights,
+                                                         g_weights)
+
             self._lime.lime_interpreter.local_weights = combined_weights
             for i, num_to_show in enumerate(nums_to_show):
                 temp, mask = self._lime.lime_interpreter.get_image_and_mask(
-                    l, positive_only=False, hide_rest=False, num_features=num_to_show
-                )
+                    l,
+                    positive_only=True,
+                    hide_rest=False,
+                    num_features=num_to_show)
                 axes[ncols * 3 + i].imshow(mark_boundaries(temp, mask))
-                axes[ncols * 3 + i].set_title("Combined: first {} superpixels".format(num_to_show))
+                axes[ncols * 3 + i].set_title(
+                    "Combined: first {} superpixels".format(num_to_show))
 
             self._lime.lime_interpreter.local_weights = lime_weights
 
-        if save_to_disk and save_outdir is not None:
-            os.makedirs(save_outdir, exist_ok=True)
+        if save_outdir is not None:
             save_fig(data_, save_outdir, 'normlime', self.num_samples)
 
         if visualization:
             plt.show()
 
 
-def auto_choose_num_features_to_show(lime_interpreter, label, percentage_to_show):
+def auto_choose_num_features_to_show(lime_interpreter, label,
+                                     percentage_to_show):
     segments = lime_interpreter.segments
     lime_weights = lime_interpreter.local_weights[label]
-    num_pixels_threshold_in_a_sp = segments.shape[0] * segments.shape[1] // len(np.unique(segments)) // 8
+    num_pixels_threshold_in_a_sp = segments.shape[0] * segments.shape[
+        1] // len(np.unique(segments)) // 8
 
     # l1 norm with filtered weights.
-    used_weights = [(tuple_w[0], tuple_w[1]) for i, tuple_w in enumerate(lime_weights) if tuple_w[1] > 0]
+    used_weights = [(tuple_w[0], tuple_w[1])
+                    for i, tuple_w in enumerate(lime_weights)
+                    if tuple_w[1] > 0]
     norm = np.sum([tuple_w[1] for i, tuple_w in enumerate(used_weights)])
-    normalized_weights = [(tuple_w[0], tuple_w[1] / norm) for i, tuple_w in enumerate(lime_weights)]
+    normalized_weights = [(tuple_w[0], tuple_w[1] / norm)
+                          for i, tuple_w in enumerate(lime_weights)]
 
     a = 0.0
     n = 0
     for i, tuple_w in enumerate(normalized_weights):
         if tuple_w[1] < 0:
             continue
-        if len(np.where(segments == tuple_w[0])[0]) < num_pixels_threshold_in_a_sp:
+        if len(np.where(segments == tuple_w[0])[
+                0]) < num_pixels_threshold_in_a_sp:
             continue
 
         a += tuple_w[1]
@@ -406,12 +632,18 @@ def auto_choose_num_features_to_show(lime_interpreter, label, percentage_to_show
         return 5
 
     if n == 0:
-        return auto_choose_num_features_to_show(lime_interpreter, label, percentage_to_show-0.1)
+        return auto_choose_num_features_to_show(lime_interpreter, label,
+                                                percentage_to_show - 0.1)
 
     return n
 
 
-def get_cam(image_show, feature_maps, fc_weights, label_index, cam_min=None, cam_max=None):
+def get_cam(image_show,
+            feature_maps,
+            fc_weights,
+            label_index,
+            cam_min=None,
+            cam_max=None):
     _, nc, h, w = feature_maps.shape
 
     cam = feature_maps * fc_weights[:, label_index].reshape(1, nc, 1, 1)
@@ -425,7 +657,8 @@ def get_cam(image_show, feature_maps, fc_weights, label_index, cam_min=None, cam
     cam = cam - cam_min
     cam = cam / cam_max
     cam = np.uint8(255 * cam)
-    cam_img = cv2.resize(cam, image_show.shape[0:2], interpolation=cv2.INTER_LINEAR)
+    cam_img = cv2.resize(
+        cam, image_show.shape[0:2], interpolation=cv2.INTER_LINEAR)
 
     heatmap = cv2.applyColorMap(np.uint8(255 * cam_img), cv2.COLORMAP_JET)
     heatmap = np.float32(heatmap)
@@ -437,34 +670,11 @@ def get_cam(image_show, feature_maps, fc_weights, label_index, cam_min=None, cam
 
 def save_fig(data_, save_outdir, algorithm_name, num_samples=3000):
     import matplotlib.pyplot as plt
-    if isinstance(data_, str):
-        if algorithm_name == 'cam':
-            f_out = "{}_{}.png".format(algorithm_name, data_.split('/')[-1])
-        else:
-            f_out = "{}_{}_s{}.png".format(algorithm_name, data_.split('/')[-1], num_samples)
-        plt.savefig(
-            os.path.join(save_outdir, f_out)
-        )
+    if algorithm_name == 'cam':
+        f_out = "{}_{}.png".format(algorithm_name, data_.split('/')[-1])
     else:
-        n = 0
-        if algorithm_name == 'cam':
-            f_out = 'cam-{}.png'.format(n)
-        else:
-            f_out = '{}_s{}-{}.png'.format(algorithm_name, num_samples, n)
-        while os.path.exists(
-                os.path.join(save_outdir, f_out)
-        ):
-            n += 1
-            if algorithm_name == 'cam':
-                f_out = 'cam-{}.png'.format(n)
-            else:
-                f_out = '{}_s{}-{}.png'.format(algorithm_name, num_samples, n)
-            continue
-        plt.savefig(
-            os.path.join(
-                save_outdir, f_out
-            )
-        )
-    logging.info('The image of intrepretation result save in {}'.format(os.path.join(
-                save_outdir, f_out
-            )))
+        f_out = "{}_{}_s{}.png".format(save_outdir, algorithm_name,
+                                       num_samples)
+
+    plt.savefig(f_out)
+    logging.info('The image of intrepretation result save in {}'.format(f_out))

+ 104 - 68
paddlex/interpret/core/lime_base.py

@@ -27,7 +27,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 The code in this file (lime_base.py) is modified from https://github.com/marcotcr/lime.
 """
 
-
 import numpy as np
 import scipy as sp
 
@@ -39,10 +38,8 @@ import paddlex.utils.logging as logging
 
 class LimeBase(object):
     """Class for learning a locally linear sparse model from perturbed data"""
-    def __init__(self,
-                 kernel_fn,
-                 verbose=False,
-                 random_state=None):
+
+    def __init__(self, kernel_fn, verbose=False, random_state=None):
         """Init function
 
         Args:
@@ -72,15 +69,14 @@ class LimeBase(object):
         """
         from sklearn.linear_model import lars_path
         x_vector = weighted_data
-        alphas, _, coefs = lars_path(x_vector,
-                                     weighted_labels,
-                                     method='lasso',
-                                     verbose=False)
+        alphas, _, coefs = lars_path(
+            x_vector, weighted_labels, method='lasso', verbose=False)
         return alphas, coefs
 
     def forward_selection(self, data, labels, weights, num_features):
         """Iteratively adds features to the model"""
-        clf = Ridge(alpha=0, fit_intercept=True, random_state=self.random_state)
+        clf = Ridge(
+            alpha=0, fit_intercept=True, random_state=self.random_state)
         used_features = []
         for _ in range(min(num_features, data.shape[1])):
             max_ = -100000000
@@ -88,11 +84,13 @@ class LimeBase(object):
             for feature in range(data.shape[1]):
                 if feature in used_features:
                     continue
-                clf.fit(data[:, used_features + [feature]], labels,
+                clf.fit(data[:, used_features + [feature]],
+                        labels,
                         sample_weight=weights)
-                score = clf.score(data[:, used_features + [feature]],
-                                  labels,
-                                  sample_weight=weights)
+                score = clf.score(
+                    data[:, used_features + [feature]],
+                    labels,
+                    sample_weight=weights)
                 if score > max_:
                     best = feature
                     max_ = score
@@ -108,8 +106,8 @@ class LimeBase(object):
         elif method == 'forward_selection':
             return self.forward_selection(data, labels, weights, num_features)
         elif method == 'highest_weights':
-            clf = Ridge(alpha=0.01, fit_intercept=True,
-                        random_state=self.random_state)
+            clf = Ridge(
+                alpha=0.01, fit_intercept=True, random_state=self.random_state)
             clf.fit(data, labels, sample_weight=weights)
 
             coef = clf.coef_
@@ -125,7 +123,8 @@ class LimeBase(object):
                     nnz_indexes = argsort_data[::-1]
                     indices = weighted_data.indices[nnz_indexes]
                     num_to_pad = num_features - sdata
-                    indices = np.concatenate((indices, np.zeros(num_to_pad, dtype=indices.dtype)))
+                    indices = np.concatenate((indices, np.zeros(
+                        num_to_pad, dtype=indices.dtype)))
                     indices_set = set(indices)
                     pad_counter = 0
                     for i in range(data.shape[1]):
@@ -135,7 +134,8 @@ class LimeBase(object):
                             if pad_counter >= num_to_pad:
                                 break
                 else:
-                    nnz_indexes = argsort_data[sdata - num_features:sdata][::-1]
+                    nnz_indexes = argsort_data[sdata - num_features:sdata][::
+                                                                           -1]
                     indices = weighted_data.indices[nnz_indexes]
                 return indices
             else:
@@ -146,13 +146,13 @@ class LimeBase(object):
                     reverse=True)
                 return np.array([x[0] for x in feature_weights[:num_features]])
         elif method == 'lasso_path':
-            weighted_data = ((data - np.average(data, axis=0, weights=weights))
-                             * np.sqrt(weights[:, np.newaxis]))
-            weighted_labels = ((labels - np.average(labels, weights=weights))
-                               * np.sqrt(weights))
+            weighted_data = ((data - np.average(
+                data, axis=0, weights=weights)) *
+                             np.sqrt(weights[:, np.newaxis]))
+            weighted_labels = ((labels - np.average(
+                labels, weights=weights)) * np.sqrt(weights))
             nonzero = range(weighted_data.shape[1])
-            _, coefs = self.generate_lars_path(weighted_data,
-                                               weighted_labels)
+            _, coefs = self.generate_lars_path(weighted_data, weighted_labels)
             for i in range(len(coefs.T) - 1, 0, -1):
                 nonzero = coefs.T[i].nonzero()[0]
                 if len(nonzero) <= num_features:
@@ -164,8 +164,8 @@ class LimeBase(object):
                 n_method = 'forward_selection'
             else:
                 n_method = 'highest_weights'
-            return self.feature_selection(data, labels, weights,
-                                          num_features, n_method)
+            return self.feature_selection(data, labels, weights, num_features,
+                                          n_method)
 
     def interpret_instance_with_data(self,
                                      neighborhood_data,
@@ -214,30 +214,31 @@ class LimeBase(object):
         weights = self.kernel_fn(distances)
         labels_column = neighborhood_labels[:, label]
         used_features = self.feature_selection(neighborhood_data,
-                                               labels_column,
-                                               weights,
-                                               num_features,
-                                               feature_selection)
+                                               labels_column, weights,
+                                               num_features, feature_selection)
         if model_regressor is None:
-            model_regressor = Ridge(alpha=1, fit_intercept=True,
-                                    random_state=self.random_state)
+            model_regressor = Ridge(
+                alpha=1, fit_intercept=True, random_state=self.random_state)
         easy_model = model_regressor
         easy_model.fit(neighborhood_data[:, used_features],
-                       labels_column, sample_weight=weights)
+                       labels_column,
+                       sample_weight=weights)
         prediction_score = easy_model.score(
             neighborhood_data[:, used_features],
-            labels_column, sample_weight=weights)
+            labels_column,
+            sample_weight=weights)
 
-        local_pred = easy_model.predict(neighborhood_data[0, used_features].reshape(1, -1))
+        local_pred = easy_model.predict(neighborhood_data[0, used_features]
+                                        .reshape(1, -1))
 
         if self.verbose:
             logging.info('Intercept' + str(easy_model.intercept_))
             logging.info('Prediction_local' + str(local_pred))
             logging.info('Right:' + str(neighborhood_labels[0, label]))
-        return (easy_model.intercept_,
-                sorted(zip(used_features, easy_model.coef_),
-                       key=lambda x: np.abs(x[1]), reverse=True),
-                prediction_score, local_pred)
+        return (easy_model.intercept_, sorted(
+            zip(used_features, easy_model.coef_),
+            key=lambda x: np.abs(x[1]),
+            reverse=True), prediction_score, local_pred)
 
 
 class ImageInterpretation(object):
@@ -254,8 +255,13 @@ class ImageInterpretation(object):
         self.local_weights = {}
         self.local_pred = None
 
-    def get_image_and_mask(self, label, positive_only=True, negative_only=False, hide_rest=False,
-                           num_features=5, min_weight=0.):
+    def get_image_and_mask(self,
+                           label,
+                           positive_only=True,
+                           negative_only=False,
+                           hide_rest=False,
+                           num_features=5,
+                           min_weight=0.):
         """Init function.
 
         Args:
@@ -279,7 +285,9 @@ class ImageInterpretation(object):
         if label not in self.local_weights:
             raise KeyError('Label not in interpretation')
         if positive_only & negative_only:
-            raise ValueError("Positive_only and negative_only cannot be true at the same time.")
+            raise ValueError(
+                "Positive_only and negative_only cannot be true at the same time."
+            )
         segments = self.segments
         image = self.image
         local_weights_label = self.local_weights[label]
@@ -289,14 +297,20 @@ class ImageInterpretation(object):
         else:
             temp = self.image.copy()
         if positive_only:
-            fs = [x[0] for x in local_weights_label
-                  if x[1] > 0 and x[1] > min_weight][:num_features]
+            fs = [
+                x[0] for x in local_weights_label
+                if x[1] > 0 and x[1] > min_weight
+            ][:num_features]
         if negative_only:
-            fs = [x[0] for x in local_weights_label
-                  if x[1] < 0 and abs(x[1]) > min_weight][:num_features]
+            fs = [
+                x[0] for x in local_weights_label
+                if x[1] < 0 and abs(x[1]) > min_weight
+            ][:num_features]
         if positive_only or negative_only:
+            c = 1 if positive_only else 0
             for f in fs:
-                temp[segments == f] = image[segments == f].copy()
+                temp[segments == f] = [0, 255, 0]
+                # temp[segments == f, c] = np.max(image)
                 mask[segments == f] = 1
             return temp, mask
         else:
@@ -330,8 +344,11 @@ class ImageInterpretation(object):
         temp = np.zeros_like(image)
 
         weight_max = abs(local_weights_label[0][1])
-        local_weights_label = [(f, w/weight_max) for f, w in local_weights_label]
-        local_weights_label = sorted(local_weights_label, key=lambda x: x[1], reverse=True)  # negatives are at last.
+        local_weights_label = [(f, w / weight_max)
+                               for f, w in local_weights_label]
+        local_weights_label = sorted(
+            local_weights_label, key=lambda x: x[1],
+            reverse=True)  # negatives are at last.
 
         cmaps = cm.get_cmap('Spectral')
         colors = cmaps(np.linspace(0, 1, len(local_weights_label)))
@@ -354,8 +371,12 @@ class LimeImageInterpreter(object):
     feature that is 1 when the value is the same as the instance being
     interpreted."""
 
-    def __init__(self, kernel_width=.25, kernel=None, verbose=False,
-                 feature_selection='auto', random_state=None):
+    def __init__(self,
+                 kernel_width=.25,
+                 kernel=None,
+                 verbose=False,
+                 feature_selection='auto',
+                 random_state=None):
         """Init function.
 
         Args:
@@ -377,22 +398,27 @@ class LimeImageInterpreter(object):
         kernel_width = float(kernel_width)
 
         if kernel is None:
+
             def kernel(d, kernel_width):
-                return np.sqrt(np.exp(-(d ** 2) / kernel_width ** 2))
+                return np.sqrt(np.exp(-(d**2) / kernel_width**2))
 
         kernel_fn = partial(kernel, kernel_width=kernel_width)
 
         self.random_state = check_random_state(random_state)
         self.feature_selection = feature_selection
-        self.base = LimeBase(kernel_fn, verbose, random_state=self.random_state)
+        self.base = LimeBase(
+            kernel_fn, verbose, random_state=self.random_state)
 
-    def interpret_instance(self, image, classifier_fn, labels=(1,),
+    def interpret_instance(self,
+                           image,
+                           classifier_fn,
+                           labels=(1, ),
                            hide_color=None,
-                           num_features=100000, num_samples=1000,
+                           num_features=100000,
+                           num_samples=1000,
                            batch_size=10,
                            distance_metric='cosine',
-                           model_regressor=None
-                           ):
+                           model_regressor=None):
         """Generates interpretations for a prediction.
 
         First, we generate neighborhood data by randomly perturbing features
@@ -435,6 +461,7 @@ class LimeImageInterpreter(object):
         self.segments = segments
 
         fudged_image = image.copy()
+        # global_mean = np.mean(image, (0, 1))
         if hide_color is None:
             # if no hide_color, use the mean
             for x in np.unique(segments):
@@ -461,24 +488,30 @@ class LimeImageInterpreter(object):
 
         top = labels
 
-        data, labels = self.data_labels(image, fudged_image, segments,
-                                        classifier_fn, num_samples,
-                                        batch_size=batch_size)
+        data, labels = self.data_labels(
+            image,
+            fudged_image,
+            segments,
+            classifier_fn,
+            num_samples,
+            batch_size=batch_size)
 
         distances = sklearn.metrics.pairwise_distances(
-            data,
-            data[0].reshape(1, -1),
-            metric=distance_metric
-        ).ravel()
+            data, data[0].reshape(1, -1), metric=distance_metric).ravel()
 
         interpretation_image = ImageInterpretation(image, segments)
         for label in top:
             (interpretation_image.intercept[label],
              interpretation_image.local_weights[label],
-             interpretation_image.score, interpretation_image.local_pred) = self.base.interpret_instance_with_data(
-                data, labels, distances, label, num_features,
-                model_regressor=model_regressor,
-                feature_selection=self.feature_selection)
+             interpretation_image.score, interpretation_image.local_pred
+             ) = self.base.interpret_instance_with_data(
+                 data,
+                 labels,
+                 distances,
+                 label,
+                 num_features,
+                 model_regressor=model_regressor,
+                 feature_selection=self.feature_selection)
         return interpretation_image
 
     def data_labels(self,
@@ -511,6 +544,9 @@ class LimeImageInterpreter(object):
         labels = []
         data[0, :] = 1
         imgs = []
+
+        logging.info("Computing LIME.", use_color=True)
+
         for row in tqdm.tqdm(data):
             temp = copy.deepcopy(image)
             zeros = np.where(row == 0)[0]

+ 219 - 38
paddlex/interpret/core/normlime_base.py

@@ -16,6 +16,7 @@ import os
 import os.path as osp
 import numpy as np
 import glob
+import tqdm
 
 from paddlex.interpret.as_data_reader.readers import read_image
 import paddlex.utils.logging as logging
@@ -38,18 +39,24 @@ def combine_normlime_and_lime(lime_weights, g_weights):
 
     for y in pred_labels:
         normlized_lime_weights_y = lime_weights[y]
-        lime_weights_dict = {tuple_w[0]: tuple_w[1] for tuple_w in normlized_lime_weights_y}
+        lime_weights_dict = {
+            tuple_w[0]: tuple_w[1]
+            for tuple_w in normlized_lime_weights_y
+        }
 
         normlized_g_weight_y = g_weights[y]
-        normlime_weights_dict = {tuple_w[0]: tuple_w[1] for tuple_w in normlized_g_weight_y}
+        normlime_weights_dict = {
+            tuple_w[0]: tuple_w[1]
+            for tuple_w in normlized_g_weight_y
+        }
 
         combined_weights[y] = [
             (seg_k, lime_weights_dict[seg_k] * normlime_weights_dict[seg_k])
             for seg_k in lime_weights_dict.keys()
         ]
 
-        combined_weights[y] = sorted(combined_weights[y],
-                                     key=lambda x: np.abs(x[1]), reverse=True)
+        combined_weights[y] = sorted(
+            combined_weights[y], key=lambda x: np.abs(x[1]), reverse=True)
 
     return combined_weights
 
@@ -67,7 +74,8 @@ def centroid_using_superpixels(features, segments):
     regions = regionprops(segments + 1)
     one_list = np.zeros((len(np.unique(segments)), features.shape[2]))
     for i, r in enumerate(regions):
-        one_list[i] = features[int(r.centroid[0] + 0.5), int(r.centroid[1] + 0.5), :]
+        one_list[i] = features[int(r.centroid[0] + 0.5), int(r.centroid[1] +
+                                                             0.5), :]
     return one_list
 
 
@@ -80,30 +88,39 @@ def get_feature_for_kmeans(feature_map, segments):
     return x
 
 
-def precompute_normlime_weights(list_data_, predict_fn, num_samples=3000, batch_size=50, save_dir='./tmp'):
+def precompute_normlime_weights(list_data_,
+                                predict_fn,
+                                num_samples=3000,
+                                batch_size=50,
+                                save_dir='./tmp'):
     # save lime weights and kmeans cluster labels
-    precompute_lime_weights(list_data_, predict_fn, num_samples, batch_size, save_dir)
+    precompute_lime_weights(list_data_, predict_fn, num_samples, batch_size,
+                            save_dir)
 
     # load precomputed results, compute normlime weights and save.
-    fname_list = glob.glob(os.path.join(save_dir, 'lime_weights_s{}*.npy'.format(num_samples)))
+    fname_list = glob.glob(
+        os.path.join(save_dir, 'lime_weights_s{}*.npy'.format(num_samples)))
     return compute_normlime_weights(fname_list, save_dir, num_samples)
 
 
-def save_one_lime_predict_and_kmean_labels(lime_all_weights, image_pred_labels, cluster_labels, save_path):
+def save_one_lime_predict_and_kmean_labels(lime_all_weights, image_pred_labels,
+                                           cluster_labels, save_path):
 
     lime_weights = {}
     for label in image_pred_labels:
         lime_weights[label] = lime_all_weights[label]
 
     for_normlime_weights = {
-        'lime_weights': lime_weights,  # a dict: class_label: (seg_label, weight)
+        'lime_weights':
+        lime_weights,  # a dict: class_label: (seg_label, weight)
         'cluster': cluster_labels  # a list with segments as indices.
     }
 
     np.save(save_path, for_normlime_weights)
 
 
-def precompute_lime_weights(list_data_, predict_fn, num_samples, batch_size, save_dir):
+def precompute_lime_weights(list_data_, predict_fn, num_samples, batch_size,
+                            save_dir):
     root_path = gen_user_home()
     root_path = osp.join(root_path, '.paddlex')
     h_pre_models = osp.join(root_path, "pre_models")
@@ -117,17 +134,24 @@ def precompute_lime_weights(list_data_, predict_fn, num_samples, batch_size, sav
 
     for data_index, each_data_ in enumerate(list_data_):
         if isinstance(each_data_, str):
-            save_path = "lime_weights_s{}_{}.npy".format(num_samples, each_data_.split('/')[-1].split('.')[0])
+            save_path = "lime_weights_s{}_{}.npy".format(
+                num_samples, each_data_.split('/')[-1].split('.')[0])
             save_path = os.path.join(save_dir, save_path)
         else:
-            save_path = "lime_weights_s{}_{}.npy".format(num_samples, data_index)
+            save_path = "lime_weights_s{}_{}.npy".format(num_samples,
+                                                         data_index)
             save_path = os.path.join(save_dir, save_path)
 
         if os.path.exists(save_path):
-            logging.info(save_path + ' exists, not computing this one.', use_color=True)
+            logging.info(
+                save_path + ' exists, not computing this one.', use_color=True)
             continue
-        img_file_name = each_data_ if isinstance(each_data_, str) else data_index
-        logging.info('processing '+ img_file_name + ' [{}/{}]'.format(data_index, len(list_data_)), use_color=True)
+        img_file_name = each_data_ if isinstance(each_data_,
+                                                 str) else data_index
+        logging.info(
+            'processing ' + img_file_name + ' [{}/{}]'.format(data_index,
+                                                              len(list_data_)),
+            use_color=True)
 
         image_show = read_image(each_data_)
         result = predict_fn(image_show)
@@ -156,32 +180,38 @@ def precompute_lime_weights(list_data_, predict_fn, num_samples, batch_size, sav
         pred_label = pred_label[:top_k]
 
         algo = lime_base.LimeImageInterpreter()
-        interpreter = algo.interpret_instance(image_show[0], predict_fn, pred_label, 0,
-                                          num_samples=num_samples, batch_size=batch_size)
-
-        X = get_feature_for_kmeans(compute_features_for_kmeans(image_show).transpose((1, 2, 0)), interpreter.segments)
+        interpreter = algo.interpret_instance(
+            image_show[0],
+            predict_fn,
+            pred_label,
+            0,
+            num_samples=num_samples,
+            batch_size=batch_size)
+
+        X = get_feature_for_kmeans(
+            compute_features_for_kmeans(image_show).transpose((1, 2, 0)),
+            interpreter.segments)
         try:
             cluster_labels = kmeans_model.predict(X)
         except AttributeError:
             from sklearn.metrics import pairwise_distances_argmin_min
-            cluster_labels, _ = pairwise_distances_argmin_min(X, kmeans_model.cluster_centers_)
+            cluster_labels, _ = pairwise_distances_argmin_min(
+                X, kmeans_model.cluster_centers_)
         save_one_lime_predict_and_kmean_labels(
-            interpreter.local_weights, pred_label,
-            cluster_labels,
-            save_path
-        )
+            interpreter.local_weights, pred_label, cluster_labels, save_path)
 
 
 def compute_normlime_weights(a_list_lime_fnames, save_dir, lime_num_samples):
     normlime_weights_all_labels = {}
-    
+
     for f in a_list_lime_fnames:
         try:
             lime_weights_and_cluster = np.load(f, allow_pickle=True).item()
             lime_weights = lime_weights_and_cluster['lime_weights']
             cluster = lime_weights_and_cluster['cluster']
         except:
-            logging.info('When loading precomputed LIME result, skipping' + str(f))
+            logging.info('When loading precomputed LIME result, skipping' +
+                         str(f))
             continue
         logging.info('Loading precomputed LIME result,' + str(f))
         pred_labels = lime_weights.keys()
@@ -203,10 +233,12 @@ def compute_normlime_weights(a_list_lime_fnames, save_dir, lime_num_samples):
     for y in normlime_weights_all_labels:
         normlime_weights = normlime_weights_all_labels.get(y, {})
         for k in normlime_weights:
-            normlime_weights[k] = sum(normlime_weights[k]) / len(normlime_weights[k])
+            normlime_weights[k] = sum(normlime_weights[k]) / len(
+                normlime_weights[k])
 
     # check normlime
-    if len(normlime_weights_all_labels.keys()) < max(normlime_weights_all_labels.keys()) + 1:
+    if len(normlime_weights_all_labels.keys()) < max(
+            normlime_weights_all_labels.keys()) + 1:
         logging.info(
             "\n" + \
             "Warning: !!! \n" + \
@@ -218,17 +250,166 @@ def compute_normlime_weights(a_list_lime_fnames, save_dir, lime_num_samples):
         )
 
     n = 0
-    f_out = 'normlime_weights_s{}_samples_{}-{}.npy'.format(lime_num_samples, len(a_list_lime_fnames), n)
-    while os.path.exists(
-            os.path.join(save_dir, f_out)
-    ):
+    f_out = 'normlime_weights_s{}_samples_{}-{}.npy'.format(
+        lime_num_samples, len(a_list_lime_fnames), n)
+    while os.path.exists(os.path.join(save_dir, f_out)):
         n += 1
-        f_out = 'normlime_weights_s{}_samples_{}-{}.npy'.format(lime_num_samples, len(a_list_lime_fnames), n)
+        f_out = 'normlime_weights_s{}_samples_{}-{}.npy'.format(
+            lime_num_samples, len(a_list_lime_fnames), n)
         continue
 
-    np.save(
-        os.path.join(save_dir, f_out),
-        normlime_weights_all_labels
-    )
+    np.save(os.path.join(save_dir, f_out), normlime_weights_all_labels)
     return os.path.join(save_dir, f_out)
 
+
+def precompute_global_classifier(dataset,
+                                 predict_fn,
+                                 save_path,
+                                 batch_size=50,
+                                 max_num_samples=1000):
+    from sklearn.linear_model import LogisticRegression
+
+    root_path = gen_user_home()
+    root_path = osp.join(root_path, '.paddlex')
+    h_pre_models = osp.join(root_path, "pre_models")
+    if not osp.exists(h_pre_models):
+        if not osp.exists(root_path):
+            os.makedirs(root_path)
+        url = "https://bj.bcebos.com/paddlex/interpret/pre_models.tar.gz"
+        pdx.utils.download_and_decompress(url, path=root_path)
+    h_pre_models_kmeans = osp.join(h_pre_models, "kmeans_model.pkl")
+    kmeans_model = load_kmeans_model(h_pre_models_kmeans)
+
+    image_list = []
+    for item in dataset.file_list:
+        image_list.append(item[0])
+
+    x_data = []
+    y_labels = []
+
+    num_features = len(kmeans_model.cluster_centers_)
+
+    logging.info(
+        "Initialization for NormLIME: Computing each sample in the test list.",
+        use_color=True)
+
+    for each_data_ in tqdm.tqdm(image_list):
+        x_data_i = np.zeros((num_features))
+        image_show = read_image(each_data_)
+        result = predict_fn(image_show)
+        result = result[0]  # only one image here.
+        c = compute_features_for_kmeans(image_show).transpose((1, 2, 0))
+
+        segments = np.zeros((image_show.shape[1], image_show.shape[2]),
+                            np.int32)
+        num_blocks = 10
+        height_per_i = segments.shape[0] // num_blocks + 1
+        width_per_i = segments.shape[1] // num_blocks + 1
+
+        for i in range(segments.shape[0]):
+            for j in range(segments.shape[1]):
+                segments[i,
+                         j] = i // height_per_i * num_blocks + j // width_per_i
+
+        # segments = quickshift(image_show[0], sigma=1)
+        X = get_feature_for_kmeans(c, segments)
+
+        try:
+            cluster_labels = kmeans_model.predict(X)
+        except AttributeError:
+            from sklearn.metrics import pairwise_distances_argmin_min
+            cluster_labels, _ = pairwise_distances_argmin_min(
+                X, kmeans_model.cluster_centers_)
+
+        for c in cluster_labels:
+            x_data_i[c] = 1
+
+        # x_data_i /= len(cluster_labels)
+
+        pred_y_i = np.argmax(result)
+        y_labels.append(pred_y_i)
+        x_data.append(x_data_i)
+
+    if len(np.unique(y_labels)) < 2:
+        logging.info("Warning: The test samples in the dataset is limited.\n \
+                     NormLIME may have no effect on the results.\n \
+                     Try to add more test samples, or see the results of LIME.")
+        num_classes = np.max(np.unique(y_labels)) + 1
+        normlime_weights_all_labels = {}
+        for class_index in range(num_classes):
+            w = np.ones((num_features)) / num_features
+            normlime_weights_all_labels[class_index] = {
+                i: wi
+                for i, wi in enumerate(w)
+            }
+        logging.info("Saving the computed normlime_weights in {}".format(
+            save_path))
+
+        np.save(save_path, normlime_weights_all_labels)
+        return save_path
+
+    clf = LogisticRegression(multi_class='multinomial', max_iter=1000)
+    clf.fit(x_data, y_labels)
+
+    num_classes = np.max(np.unique(y_labels)) + 1
+    normlime_weights_all_labels = {}
+
+    if len(y_labels) / len(np.unique(y_labels)) < 3:
+        logging.info("Warning: The test samples in the dataset is limited.\n \
+                     NormLIME may have no effect on the results.\n \
+                     Try to add more test samples, or see the results of LIME.")
+
+    if len(np.unique(y_labels)) == 2:
+        # binary: clf.coef_ has shape of [1, num_features]
+        for class_index in range(num_classes):
+            if class_index not in clf.classes_:
+                w = np.ones((num_features)) / num_features
+                normlime_weights_all_labels[class_index] = {
+                    i: wi
+                    for i, wi in enumerate(w)
+                }
+                continue
+
+            if clf.classes_[0] == class_index:
+                w = -clf.coef_[0]
+            else:
+                w = clf.coef_[0]
+
+            # softmax
+            w = w - np.max(w)
+            exp_w = np.exp(w * 10)
+            w = exp_w / np.sum(exp_w)
+
+            normlime_weights_all_labels[class_index] = {
+                i: wi
+                for i, wi in enumerate(w)
+            }
+    else:
+        # clf.coef_ has shape of [len(np.unique(y_labels)), num_features]
+        for class_index in range(num_classes):
+            if class_index not in clf.classes_:
+                w = np.ones((num_features)) / num_features
+                normlime_weights_all_labels[class_index] = {
+                    i: wi
+                    for i, wi in enumerate(w)
+                }
+                continue
+
+            coef_class_index = np.where(clf.classes_ == class_index)[0][0]
+            w = clf.coef_[coef_class_index]
+
+            # softmax
+            w = w - np.max(w)
+            exp_w = np.exp(w * 10)
+            w = exp_w / np.sum(exp_w)
+
+            normlime_weights_all_labels[class_index] = {
+                i: wi
+                for i, wi in enumerate(w)
+            }
+
+    logging.info("Saving the computed normlime_weights in {}".format(
+        save_path))
+    np.save(save_path, normlime_weights_all_labels)
+
+    return save_path

+ 18 - 9
paddlex/interpret/interpretation_predict.py

@@ -13,17 +13,26 @@
 # limitations under the License.
 
 import numpy as np
+import cv2
+import copy
+
 
 def interpretation_predict(model, images):
-    model.arrange_transforms(
-            transforms=model.test_transforms, mode='test')
+    images = images.astype('float32')
+    model.arrange_transforms(transforms=model.test_transforms, mode='test')
+    tmp_transforms = copy.deepcopy(model.test_transforms.transforms)
+    model.test_transforms.transforms = model.test_transforms.transforms[-2:]
+
     new_imgs = []
     for i in range(images.shape[0]):
-        img = images[i]
-        new_imgs.append(model.test_transforms(img)[0])
+        images[i] = cv2.cvtColor(images[i], cv2.COLOR_RGB2BGR)
+        new_imgs.append(model.test_transforms(images[i])[0])
+
     new_imgs = np.array(new_imgs)
-    result = model.exe.run(
-        model.test_prog,
-        feed={'image': new_imgs},
-        fetch_list=list(model.interpretation_feats.values()))
-    return result
+    out = model.exe.run(model.test_prog,
+                        feed={'image': new_imgs},
+                        fetch_list=list(model.interpretation_feats.values()))
+
+    model.test_transforms.transforms = tmp_transforms
+
+    return out

+ 85 - 88
paddlex/interpret/visualize.py

@@ -20,79 +20,79 @@ import numpy as np
 import paddlex as pdx
 from .interpretation_predict import interpretation_predict
 from .core.interpretation import Interpretation
-from .core.normlime_base import precompute_normlime_weights
+from .core.normlime_base import precompute_global_classifier
 from .core._session_preparation import gen_user_home
-   
-def lime(img_file, 
-         model, 
-         num_samples=3000, 
-         batch_size=50,
-         save_dir='./'):
-    """使用LIME算法将模型预测结果的可解释性可视化。 
-    
+
+
+def lime(img_file, model, num_samples=3000, batch_size=50, save_dir='./'):
+    """使用LIME算法将模型预测结果的可解释性可视化。
+
     LIME表示与模型无关的局部可解释性,可以解释任何模型。LIME的思想是以输入样本为中心,
     在其附近的空间中进行随机采样,每个采样通过原模型得到新的输出,这样得到一系列的输入
     和对应的输出,LIME用一个简单的、可解释的模型(比如线性回归模型)来拟合这个映射关系,
-    得到每个输入维度的权重,以此来解释模型。  
-    
+    得到每个输入维度的权重,以此来解释模型。
+
     注意:LIME可解释性结果可视化目前只支持分类模型。
-         
+
     Args:
         img_file (str): 预测图像路径。
         model (paddlex.cv.models): paddlex中的模型。
         num_samples (int): LIME用于学习线性模型的采样数,默认为3000。
         batch_size (int): 预测数据batch大小,默认为50。
-        save_dir (str): 可解释性可视化结果(保存为png格式文件)和中间文件存储路径。        
+        save_dir (str): 可解释性可视化结果(保存为png格式文件)和中间文件存储路径。
     """
     assert model.model_type == 'classifier', \
         'Now the interpretation visualize only be supported in classifier!'
     if model.status != 'Normal':
-        raise Exception('The interpretation only can deal with the Normal model')
+        raise Exception(
+            'The interpretation only can deal with the Normal model')
     if not osp.exists(save_dir):
         os.makedirs(save_dir)
-    model.arrange_transforms(
-                transforms=model.test_transforms, mode='test')
+    model.arrange_transforms(transforms=model.test_transforms, mode='test')
     tmp_transforms = copy.deepcopy(model.test_transforms)
     tmp_transforms.transforms = tmp_transforms.transforms[:-2]
     img = tmp_transforms(img_file)[0]
     img = np.around(img).astype('uint8')
     img = np.expand_dims(img, axis=0)
     interpreter = None
-    interpreter = get_lime_interpreter(img, model, num_samples=num_samples, batch_size=batch_size)
+    interpreter = get_lime_interpreter(
+        img, model, num_samples=num_samples, batch_size=batch_size)
     img_name = osp.splitext(osp.split(img_file)[-1])[0]
-    interpreter.interpret(img, save_dir=save_dir)
-    
-    
-def normlime(img_file, 
-              model, 
-              dataset=None,
-              num_samples=3000, 
-              batch_size=50,
-              save_dir='./'):
+    interpreter.interpret(img, save_dir=osp.join(save_dir, img_name))
+
+
+def normlime(img_file,
+             model,
+             dataset=None,
+             num_samples=3000,
+             batch_size=50,
+             save_dir='./',
+             normlime_weights_file=None):
     """使用NormLIME算法将模型预测结果的可解释性可视化。
-    
+
     NormLIME是利用一定数量的样本来出一个全局的解释。NormLIME会提前计算一定数量的测
     试样本的LIME结果,然后对相同的特征进行权重的归一化,这样来得到一个全局的输入和输出的关系。
-    
+
     注意1:dataset读取的是一个数据集,该数据集不宜过大,否则计算时间会较长,但应包含所有类别的数据。
     注意2:NormLIME可解释性结果可视化目前只支持分类模型。
-         
+
     Args:
         img_file (str): 预测图像路径。
         model (paddlex.cv.models): paddlex中的模型。
         dataset (paddlex.datasets): 数据集读取器,默认为None。
         num_samples (int): LIME用于学习线性模型的采样数,默认为3000。
         batch_size (int): 预测数据batch大小,默认为50。
-        save_dir (str): 可解释性可视化结果(保存为png格式文件)和中间文件存储路径。        
+        save_dir (str): 可解释性可视化结果(保存为png格式文件)和中间文件存储路径。
+        normlime_weights_file (str): NormLIME初始化文件名,若不存在,则计算一次,保存于该路径;若存在,则直接载入。
     """
     assert model.model_type == 'classifier', \
         'Now the interpretation visualize only be supported in classifier!'
     if model.status != 'Normal':
-        raise Exception('The interpretation only can deal with the Normal model')
+        raise Exception(
+            'The interpretation only can deal with the Normal model')
     if not osp.exists(save_dir):
         os.makedirs(save_dir)
-    model.arrange_transforms(
-                transforms=model.test_transforms, mode='test')
+    model.arrange_transforms(transforms=model.test_transforms, mode='test')
     tmp_transforms = copy.deepcopy(model.test_transforms)
     tmp_transforms.transforms = tmp_transforms.transforms[:-2]
     img = tmp_transforms(img_file)[0]
@@ -100,52 +100,48 @@ def normlime(img_file,
     img = np.expand_dims(img, axis=0)
     interpreter = None
     if dataset is None:
-        raise Exception('The dataset is None. Cannot implement this kind of interpretation')
-    interpreter = get_normlime_interpreter(img, model, dataset, 
-                                 num_samples=num_samples, batch_size=batch_size,
-                                     save_dir=save_dir)
+        raise Exception(
+            'The dataset is None. Cannot implement this kind of interpretation')
+    interpreter = get_normlime_interpreter(
+        img,
+        model,
+        dataset,
+        num_samples=num_samples,
+        batch_size=batch_size,
+        save_dir=save_dir,
+        normlime_weights_file=normlime_weights_file)
     img_name = osp.splitext(osp.split(img_file)[-1])[0]
-    interpreter.interpret(img, save_dir=save_dir)
-    
-    
+    interpreter.interpret(img, save_dir=osp.join(save_dir, img_name))
+
+
 def get_lime_interpreter(img, model, num_samples=3000, batch_size=50):
     def predict_func(image):
-        image = image.astype('float32')
-        for i in range(image.shape[0]):
-            image[i] = cv2.cvtColor(image[i], cv2.COLOR_RGB2BGR)
-        tmp_transforms = copy.deepcopy(model.test_transforms.transforms)
-        model.test_transforms.transforms = model.test_transforms.transforms[-2:]
         out = interpretation_predict(model, image)
-        model.test_transforms.transforms = tmp_transforms
         return out[0]
+
     labels_name = None
     if hasattr(model, 'labels'):
         labels_name = model.labels
-    interpreter = Interpretation('lime', 
-                            predict_func,
-                            labels_name,
-                            num_samples=num_samples, 
-                            batch_size=batch_size)
+    interpreter = Interpretation(
+        'lime',
+        predict_func,
+        labels_name,
+        num_samples=num_samples,
+        batch_size=batch_size)
     return interpreter
 
 
-def get_normlime_interpreter(img, model, dataset, num_samples=3000, batch_size=50, save_dir='./'):
-    def precompute_predict_func(image):
-        image = image.astype('float32')
-        tmp_transforms = copy.deepcopy(model.test_transforms.transforms)
-        model.test_transforms.transforms = model.test_transforms.transforms[-2:]
-        out = interpretation_predict(model, image)
-        model.test_transforms.transforms = tmp_transforms
-        return out[0]
+def get_normlime_interpreter(img,
+                             model,
+                             dataset,
+                             num_samples=3000,
+                             batch_size=50,
+                             save_dir='./',
+                             normlime_weights_file=None):
     def predict_func(image):
-        image = image.astype('float32')
-        for i in range(image.shape[0]):
-            image[i] = cv2.cvtColor(image[i], cv2.COLOR_RGB2BGR)
-        tmp_transforms = copy.deepcopy(model.test_transforms.transforms)
-        model.test_transforms.transforms = model.test_transforms.transforms[-2:]
         out = interpretation_predict(model, image)
-        model.test_transforms.transforms = tmp_transforms
         return out[0]
+
     labels_name = None
     if dataset is not None:
         labels_name = dataset.labels
@@ -157,28 +153,29 @@ def get_normlime_interpreter(img, model, dataset, num_samples=3000, batch_size=5
             os.makedirs(root_path)
         url = "https://bj.bcebos.com/paddlex/interpret/pre_models.tar.gz"
         pdx.utils.download_and_decompress(url, path=root_path)
-    npy_dir = precompute_for_normlime(precompute_predict_func, 
-                                      dataset, 
-                                      num_samples=num_samples, 
-                                      batch_size=batch_size,
-                                      save_dir=save_dir)
-    interpreter = Interpretation('normlime', 
-                            predict_func,
-                            labels_name,
-                            num_samples=num_samples, 
-                            batch_size=batch_size,
-                            normlime_weights=npy_dir)
-    return interpreter
-
 
-def precompute_for_normlime(predict_func, dataset, num_samples=3000, batch_size=50, save_dir='./'):
-    image_list = []
-    for item in dataset.file_list:
-        image_list.append(item[0])
-    return precompute_normlime_weights(
-            image_list,  
+    if osp.exists(osp.join(save_dir, normlime_weights_file)):
+        normlime_weights_file = osp.join(save_dir, normlime_weights_file)
+        try:
+            np.load(normlime_weights_file, allow_pickle=True).item()
+        except:
+            normlime_weights_file = precompute_global_classifier(
+                dataset,
+                predict_func,
+                save_path=normlime_weights_file,
+                batch_size=batch_size)
+    else:
+        normlime_weights_file = precompute_global_classifier(
+            dataset,
             predict_func,
-            num_samples=num_samples, 
-            batch_size=batch_size,
-            save_dir=save_dir)
-  
+            save_path=osp.join(save_dir, normlime_weights_file),
+            batch_size=batch_size)
+
+    interpreter = Interpretation(
+        'normlime',
+        predict_func,
+        labels_name,
+        num_samples=num_samples,
+        batch_size=batch_size,
+        normlime_weights=normlime_weights_file)
+    return interpreter

+ 1 - 1
paddlex/slim.py

@@ -31,4 +31,4 @@ def export_quant_model(model,
         batch_size=batch_size,
         batch_num=batch_num,
         save_dir=save_dir,
-        cache_dir='./temp')
+        cache_dir=cache_dir)

+ 2 - 2
paddlex/tools/x2coco.py

@@ -110,7 +110,7 @@ class LabelMe2COCO(X2COCO):
         annotation["segmentation"] = [list(np.asarray(points).flatten())]
         annotation["iscrowd"] = 0
         annotation["image_id"] = image_id + 1
-        annotation["bbox"] = list(map(float, get_bbox(height, width, points)))
+        annotation["bbox"] = list(map(float, self.get_bbox(height, width, points)))
         annotation["area"] = annotation["bbox"][2] * annotation["bbox"][3]
         annotation["category_id"] = label_to_num[label]
         annotation["id"] = object_id + 1
@@ -254,4 +254,4 @@ class EasyData2COCO(X2COCO):
                                 segmentation.append(contour_list)
                         self.annotations_list.append(
                             self.generate_polygon_anns_field(points, segmentation, label, image_id, object_id,
-                                                label_to_num))
+                                                label_to_num))

+ 8 - 8
paddlex/utils/logging.py

@@ -29,13 +29,11 @@ def log(level=2, message="", use_color=False):
     current_time = time.strftime("%Y-%m-%d %H:%M:%S", time_array)
     if paddlex.log_level >= level:
         if use_color:
-            print("\033[1;31;40m{} [{}]\t{}\033[0m".format(
-                current_time, levels[level],
-                message).encode("utf-8").decode("latin1"))
+            print("\033[1;31;40m{} [{}]\t{}\033[0m".format(current_time, levels[
+                level], message).encode("utf-8").decode("latin1"))
         else:
-            print(
-                "{} [{}]\t{}".format(current_time, levels[level],
-                                     message).encode("utf-8").decode("latin1"))
+            print("{} [{}]\t{}".format(current_time, levels[level], message)
+                  .encode("utf-8").decode("latin1"))
         sys.stdout.flush()
 
 
@@ -47,9 +45,11 @@ def info(message="", use_color=False):
     log(level=2, message=message, use_color=use_color)
 
 
-def warning(message="", use_color=False):
+def warning(message="", use_color=True):
     log(level=1, message=message, use_color=use_color)
 
 
-def error(message="", use_color=False):
+def error(message="", use_color=True, exit=True):
     log(level=0, message=message, use_color=use_color)
+    if exit:
+        sys.exit(-1)

+ 1 - 1
setup.py

@@ -19,7 +19,7 @@ long_description = "PaddleX. A end-to-end deeplearning model development toolkit
 
 setuptools.setup(
     name="paddlex",
-    version='1.0.4',
+    version='1.0.6',
     author="paddlex",
     author_email="paddlex@baidu.com",
     description=long_description,

+ 12 - 8
tutorials/interpret/normlime.py

@@ -14,18 +14,22 @@ model_file = 'https://bj.bcebos.com/paddlex/interpret/mini_imagenet_veg_mobilene
 pdx.utils.download_and_decompress(model_file, path='./')
 
 # 加载模型
-model = pdx.load_model('mini_imagenet_veg_mobilenetv2')
+model_file = 'mini_imagenet_veg_mobilenetv2'
+model = pdx.load_model(model_file)
 
 # 定义测试所用的数据集
+dataset = 'mini_imagenet_veg'
 test_dataset = pdx.datasets.ImageNet(
-    data_dir='mini_imagenet_veg',
-    file_list=osp.join('mini_imagenet_veg', 'test_list.txt'),
-    label_list=osp.join('mini_imagenet_veg', 'labels.txt'),
+    data_dir=dataset,
+    file_list=osp.join(dataset, 'test_list.txt'),
+    label_list=osp.join(dataset, 'labels.txt'),
     transforms=model.test_transforms)
 
 # 可解释性可视化
 pdx.interpret.normlime(
-         'mini_imagenet_veg/mushroom/n07734744_1106.JPEG', 
-          model, 
-          test_dataset, 
-          save_dir='./')
+    test_dataset.file_list[0][0],
+    model,
+    test_dataset,
+    save_dir='./',
+    normlime_weights_file='{}_{}.npy'.format(
+        dataset.split('/')[-1], model.model_name))