Browse Source

Merge pull request #8 from PaddlePaddle/develop

00
SunAhong1993 5 years ago
parent
commit
02a748747f

+ 1 - 1
.pre-commit-config.yaml

@@ -35,6 +35,6 @@
     -   id: cpplint-cpp-source
         name: cpplint
         description: Check C++ code style using cpplint.py.
-        entry: bash cpplint_pre_commit.hook
+        entry: bash ./tools/codestyle/cpplint_pre_commit.hook
         language: system
         files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx)$

+ 2 - 1
deploy/cpp/include/paddlex/results.h

@@ -63,9 +63,10 @@ class SegResult : public BaseResult {
  public:
   Mask<int64_t> label_map;
   Mask<float> score_map;
+  std::string type = "seg";
   void clear() {
     label_map.clear();
     score_map.clear();
   }
 };
-}  // namespce of PaddleX
+}  // namespace PaddleX

+ 9 - 0
deploy/cpp/src/paddlex.cpp

@@ -65,6 +65,15 @@ bool Model::load_config(const std::string& model_dir) {
   YAML::Node config = YAML::LoadFile(yaml_file);
   type = config["_Attributes"]["model_type"].as<std::string>();
   name = config["Model"].as<std::string>();
+  std::string version = config["version"].as<std::string>();
+  if (version[0] == '0') {
+    std::cerr << "[Init] Version of the loaded model is lower than 1.0.0, deployment "
+              << "cannot be done, please refer to "
+              << "https://github.com/PaddlePaddle/PaddleX/blob/develop/docs/tutorials/deploy/upgrade_version.md "
+              << "to transfer version."
+              << std::endl;
+    return false;
+  }
   bool to_rgb = true;
   if (config["TransformsMode"].IsDefined()) {
     std::string mode = config["TransformsMode"].as<std::string>();

+ 8 - 8
deploy/lite/export_lite.py

@@ -19,30 +19,30 @@ import argparse
 
 def export_lite():
     opt = lite.Opt()
-    model_file = os.path.join(FLAGS.model_path, '__model__')
-    params_file = os.path.join(FLAGS.model_path, '__params__')
-    opt.run_optimize("", model_file, params_file, FLAGS.place, FLAGS.save_dir)
+    model_file = os.path.join(FLAGS.model_dir, '__model__')
+    params_file = os.path.join(FLAGS.model_dir, '__params__')
+    opt.run_optimize("", model_file, params_file, FLAGS.place, FLAGS.save_file)
 
 
 if __name__ == '__main__':
     parser = argparse.ArgumentParser(description=__doc__)
     parser.add_argument(
-        "--model_path",
+        "--model_dir",
         type=str,
         default="",
-        help="model path.",
+        help="path of '__model__' and '__params__'.",
         required=True)
     parser.add_argument(
         "--place",
         type=str,
         default="arm",
-        help="preprocess config path.",
+        help="run place: 'arm|opencl|x86|npu|xpu|rknpu|apu'.",
         required=True)
     parser.add_argument(
-        "--save_dir",
+        "--save_file",
         type=str,
         default="paddlex.onnx",
-        help="Directory for storing the output visualization files.",
+        help="file name for storing the output files.",
         required=True)
     FLAGS = parser.parse_args()
     export_lite()

+ 3 - 2
docs/apis/deploy.md

@@ -1,6 +1,6 @@
-# Predictor部署-paddlex.deploy
+# 预测部署-paddlex.deploy
 
-使用AnalysisPredictor进行预测部署。
+使用Paddle Inference进行高性能的Python预测部署。更多关于Paddle Inference信息请参考[Paddle Inference文档](https://paddle-inference.readthedocs.io/en/latest/#)
 
 ## Predictor类
 
@@ -22,6 +22,7 @@ paddlex.deploy.Predictor(model_dir, use_gpu=False, gpu_id=0, use_mkl=False, use_
 > >
 > > ```
 > > import paddlex
+> >
 > > model = paddlex.deploy.Predictor(model_dir, use_gpu=True)
 > > result = model.predict(image_file)
 > > ```

+ 1 - 1
docs/apis/transforms/augment.md

@@ -9,7 +9,7 @@ PaddleX对于图像分类、目标检测、实例分割和语义分割内置了
 | 任务类型 | 增强方法     |
 | :------- | :------------|
 | 图像分类 | [RandomCrop](cls_transforms.html#randomcrop)、[RandomHorizontalFlip](cls_transforms.html#randomhorizontalflip)、[RandomVerticalFlip](cls_transforms.html#randomverticalflip)、 <br> [RandomRotate](cls_transforms.html#randomratate)、 [RandomDistort](cls_transforms.html#randomdistort) |
-|目标检测<br>实例分割| [RandomHorizontalFlip](det_transforms.html#randomhorizontalflip)、[RandomDistort](det_transforms.html#randomdistort)、[RandomCrop](det_transforms.html#randomcrop)、<br> [[MixupImage](det_transforms.html#mixupimage)(仅支持YOLOv3模型)、RandomExpand](det_transforms.html#randomexpand) |
+|目标检测<br>实例分割| [RandomHorizontalFlip](det_transforms.html#randomhorizontalflip)、[RandomDistort](det_transforms.html#randomdistort)、[RandomCrop](det_transforms.html#randomcrop)、<br> [MixupImage](det_transforms.html#mixupimage)(仅支持YOLOv3模型)、[RandomExpand](det_transforms.html#randomexpand) |
 |语义分割  | [RandomHorizontalFlip](seg_transforms.html#randomhorizontalflip)、[RandomVerticalFlip](seg_transforms.html#randomverticalflip)、[RandomRangeScaling](seg_transforms.html#randomrangescaling)、<br> [RandomStepScaling](seg_transforms.html#randomstepscaling)、[RandomPaddingCrop](seg_transforms.html#randompaddingcrop)、 [RandomBlur](seg_transforms.html#randomblur)、<br> [RandomRotation](seg_transforms.html#randomrotation)、[RandomScaleAspect](seg_transforms.html#randomscaleaspect)、[RandomDistort](seg_transforms.html#randomdistort) |
 
 ## imgaug增强库的支持

+ 2 - 2
docs/apis/transforms/cls_transforms.md

@@ -15,7 +15,7 @@ paddlex.cls.transforms.Compose(transforms)
 
 ## RandomCrop类
 ```python
-paddlex.cls.transforms.RandomCrop(crop_size=224, lower_scale=0.88, lower_ratio=3. / 4, upper_ratio=4. / 3)
+paddlex.cls.transforms.RandomCrop(crop_size=224, lower_scale=0.08, lower_ratio=3. / 4, upper_ratio=4. / 3)
 ```
 
 对图像进行随机剪裁,模型训练时的数据增强操作。
@@ -26,7 +26,7 @@ paddlex.cls.transforms.RandomCrop(crop_size=224, lower_scale=0.88, lower_ratio=3
 
 ### 参数
 * **crop_size** (int): 随机裁剪后重新调整的目标边长。默认为224。
-* **lower_scale** (float): 裁剪面积相对原面积比例的最小限制。默认为0.88。
+* **lower_scale** (float): 裁剪面积相对原面积比例的最小限制。默认为0.08。
 * **lower_ratio** (float): 宽变换比例的最小限制。默认为3. / 4。
 * **upper_ratio** (float): 宽变换比例的最小限制。默认为4. / 3。
 

+ 1 - 0
docs/appendix/index.rst

@@ -8,6 +8,7 @@
 
    model_zoo.md
    metrics.md
+   interpret.md
    parameters.md
    how_to_convert_dataset.md
    datasets.md

+ 31 - 0
docs/appendix/interpret.md

@@ -0,0 +1,31 @@
+# PaddleX可解释性
+
+目前深度学习模型普遍存在一个问题,因为使用模型预测还是一个黑盒,几乎无法去感知它的内部工作状态,预测结果的可信度一直遭到质疑。为此,PadlleX提供了2种对图像分类预测结果进行可解释性研究的算法:LIME和NormLIME。
+
+## LIME
+LIME全称Local interpretable model-agnostic explanations,表示一种与模型无关的局部可解释性。其实现步骤主要如下:
+1. 获取图像的超像素。  
+2. 以输入样本为中心,在其附近的空间中进行随机采样,每个采样即对对象中的超像素进行随机遮掩(每个采样的权重和该采样与原样本的距离成反比)。  
+3. 每个采样通过预测模型得到新的输出,这样得到一系列的输入`X`和对应的输出`Y`。  
+4. 将`X`转换为超像素特征`F`,用一个简单的、可解释的模型`Model`(这里使用岭回归)来拟合`F`和`Y`的映射关系。  
+5. `Model`将得到`F`每个输入维度的权重(每个维度代表一个超像素),以此来解释模型。  
+
+LIME的使用方式可参见[代码示例](https://github.com/PaddlePaddle/PaddleX/blob/develop/tutorials/interpret/lime.py)和[api介绍](../apis/visualize.html#lime)。在使用时,参数中的`num_samples`设置尤为重要,其表示上述步骤2中的随机采样的个数,若设置过小会影响可解释性结果的稳定性,若设置过大则将在上述步骤3耗费较长时间;参数`batch_size`则表示在计算上述步骤3时,预测的batch size,若设置过小将在上述步骤3耗费较长时间,而上限则根据机器配置决定。  
+
+最终LIME可解释性算法的可视化结果如下所示:  
+![](../images/lime.png)  
+图中绿色区域代表起正向作用的超像素,红色区域代表起反向作用的超像素,"First n superpixels"代表前n个权重比较大的超像素(由上述步骤5计算所得结果)。
+
+
+## NormLIME
+NormLIME是在LIME上的改进,LIME的解释是局部性的,是针对当前样本给的特定解释,而NormLIME是利用一定数量的样本对当前样本的一个全局性的解释,有一定的降噪效果。其实现步骤如下所示:  
+1. 下载Kmeans模型参数和ResNet50_vc网络前三层参数。(ResNet50_vc的参数是在ImageNet上训练所得网络的参数;使用ImageNet图像作为数据集,每张图像从ResNet50_vc的第三层输出提取对应超象素位置上的平均特征和质心上的特征,训练将得到此处的Kmeans模型)  
+2. 计算测试集中每张图像的LIME结果。(如无测试集,可用验证集代替)  
+3. 使用Kmeans模型对所有图像中的所有像素进行聚类。  
+4. 对在同一个簇的超像素(相同的特征)进行权重的归一化,得到每个超像素的权重,以此来解释模型。  
+
+NormLIME的使用方式可参见[代码示例](https://github.com/PaddlePaddle/PaddleX/blob/develop/tutorials/interpret/normlime.py)和[api介绍](../apis/visualize.html#normlime)。在使用时,参数中的`num_samples`设置尤为重要,其表示上述步骤2中的随机采样的个数,若设置过小会影响可解释性结果的稳定性,若设置过大则将在上述步骤3耗费较长时间;参数`batch_size`则表示在计算上述步骤3时,预测的batch size,若设置过小将在上述步骤3耗费较长时间,而上限则根据机器配置决定;而`dataset`则是由测试集或验证集构造的数据。  
+
+最终NormLIME可解释性算法的可视化结果如下所示:  
+![](../images/normlime.png)  
+图中绿色区域代表起正向作用的超像素,红色区域代表起反向作用的超像素,"First n superpixels"代表前n个权重比较大的超像素(由上述步骤5计算所得结果)。图中最后一行代表把LIME和NormLIME对应超像素权重相乘的结果。

BIN
docs/images/lime.png


BIN
docs/images/normlime.png


+ 1 - 1
docs/index.rst

@@ -32,7 +32,7 @@ PaddleX是基于飞桨核心框架、开发套件和工具组件的深度学习
 
 * PaddleX版本: v1.0.0
 * 项目官网: http://www.paddlepaddle.org.cn/paddle/paddlex  
-* 项目GitHub: https://github.com/PaddlePaddle/PaddleX/tree/develop  
+* 项目GitHub: https://github.com/PaddlePaddle/PaddleX
 * 官方QQ用户群: 1045148026  
 * GitHub Issue反馈: http://www.github.com/PaddlePaddle/PaddleX/issues
 

+ 2 - 2
docs/quick_start.md

@@ -61,7 +61,7 @@ eval_dataset = pdx.datasets.ImageNet(
 本文档中使用百度基于蒸馏方法得到的MobileNetV3预训练模型,模型结构与MobileNetV3一致,但精度更高。PaddleX内置了20多种分类模型,查阅[PaddleX模型库](appendix/model_zoo.md)了解更多分类模型。
 ```
 num_classes = len(train_dataset.labels)
-model.pdx.cls.MobileNetV3_small_ssld(num_classes=num_classes)
+model = pdx.cls.MobileNetV3_small_ssld(num_classes=num_classes)
 ```
 
 ### 3.4 定义训练参数
@@ -86,7 +86,7 @@ python train.py
 ## 5. 训练过程中查看训练指标
 模型在训练过程中,所有的迭代信息将以标注输出流的形式,输出到命令执行的终端上,用户也可通过visualdl以可视化的方式查看训练指标的变化,通过如下方式启动visualdl后,在浏览器打开https://0.0.0.0:8001 (或 https://localhost:8001)即可。
 ```
-visualdl --logdir output/mobilenetv2/vdl_log --port 8000
+visualdl --logdir output/mobilenetv2/vdl_log --port 8001
 ```
 ![](./images/vdl1.jpg)
 

+ 1 - 5
docs/tutorials/dataset_prepare.md

@@ -1,7 +1,3 @@
 # 数据准备
 
-## 数据标注
-
-## 主流标注软件支持
-
-## EasyData数据标注支持
+该部分内容已迁移至[附录](../appendix/datasets.md)

+ 16 - 2
docs/tutorials/deploy/deploy_lite.md

@@ -1,5 +1,12 @@
 # 移动端部署
 
+PaddleX的移动端部署由PaddleLite实现,部署的流程如下,首先将训练好的模型导出为inference model,然后使用PaddleLite的python接口对模型进行优化,最后使用PaddleLite的预测库进行部署,
+PaddleLite的详细介绍和使用可参考:[PaddleLite文档](https://paddle-lite.readthedocs.io/zh/latest/)
+
+> PaddleX --> Inference Model --> PaddleLite Opt --> PaddleLite Inference
+
+以下介绍如何将PaddleX导出为inference model,然后使用PaddleLite的OPT模块对模型进行优化:
+
 step 1: 安装PaddleLite
 
 ```
@@ -9,14 +16,21 @@ pip install paddlelite
 step 2: 将PaddleX模型导出为inference模型
 
 参考[导出inference模型](deploy_server/deploy_python.html#inference)将模型导出为inference格式模型。
+**注意:由于PaddleX代码的持续更新,版本低于1.0.0的模型暂时无法直接用于预测部署,参考[模型版本升级](./upgrade_version.md)对模型版本进行升级。**
 
 step 3: 将inference模型转换成PaddleLite模型
 
 ```
-python /path/to/PaddleX/deploy/lite/export_lite.py --model_path /path/to/inference_model --save_dir /path/to/onnx_model
+python /path/to/PaddleX/deploy/lite/export_lite.py --model_dir /path/to/inference_model --save_file /path/to/onnx_model --place place/to/run
+
 ```
 
-`--model_path`用于指定inference模型的路径,`--save_dir`用于指定Lite模型的保存路径。
+|  参数   | 说明  |
+|  ----  | ----  |
+| model_dir  | 预测模型所在路径,包含"__model__", "__params__"文件 |
+| save_file  | 模型输出的名称,默认为"paddlex.nb" |
+| place  | 运行的平台,可选:arm|opencl|x86|npu|xpu|rknpu|apu |
+
 
 step 4: 预测
 

+ 2 - 1
docs/tutorials/deploy/deploy_server/deploy_cpp/deploy_cpp_linux.md

@@ -104,7 +104,8 @@ make
 
 ### Step5: 预测及可视化
 
-参考[导出inference模型](../deploy_python.html#inference)将模型导出为inference格式模型。
+参考[导出inference模型](../../deploy_python.html#inference)将模型导出为inference格式模型。
+**注意:由于PaddleX代码的持续更新,版本低于1.0.0的模型暂时无法直接用于预测部署,参考[模型版本升级](../../upgrade_version.md)对模型版本进行升级。**
 
 编译成功后,预测demo的可执行程序分别为`build/demo/detector`,`build/demo/classifer`,`build/demo/segmenter`,用户可根据自己的模型类型选择,其主要命令参数说明如下:
 

+ 1 - 0
docs/tutorials/deploy/deploy_server/deploy_cpp/deploy_cpp_win_vs2019.md

@@ -100,6 +100,7 @@ PaddlePaddle C++ 预测库针对不同的`CPU`,`CUDA`,以及是否支持Tens
 ### Step5: 预测及可视化
 
 参考[导出inference模型](../deploy_python.html#inference)将模型导出为inference格式模型。
+**注意:由于PaddleX代码的持续更新,版本低于1.0.0的模型暂时无法直接用于预测部署,参考[模型版本升级](../../upgrade_version.md)对模型版本进行升级。**
 
 上述`Visual Studio 2019`编译产出的可执行文件在`out\build\x64-Release`目录下,打开`cmd`,并切换到该目录:
 

+ 2 - 0
docs/tutorials/deploy/deploy_server/deploy_python.md

@@ -20,6 +20,8 @@ paddlex --export_inference --model_dir=./xiaoduxiong_epoch_12 --save_dir=./infer
 ```
 
 ## 预测部署
+**注意:由于PaddleX代码的持续更新,版本低于1.0.0的模型暂时无法直接用于预测部署,参考[模型版本升级](../upgrade_version.md)对模型版本进行升级。**
+
 > 点击下载测试图片 [xiaoduxiong_test_image.tar.gz](https://bj.bcebos.com/paddlex/datasets/xiaoduxiong_test_image.tar.gz)
 
 ```

+ 1 - 1
docs/tutorials/deploy/deploy_server/encryption.md

@@ -61,7 +61,7 @@ paddlex-encryption
 ./paddlex-encryption/tool/paddlex_encrypt_tool -model_dir /path/to/paddlex_inference_model -save_dir /path/to/paddlex_encrypted_model
 ```
 
-`-model_dir`用于指定inference模型路径(参考[导出inference模型](deploy_python.html#inference)将模型导出为inference格式模型),可使用[导出小度熊识别模型](deploy_python.html#inference)中导出的`inference_model`。加密完成后,加密过的模型会保存至指定的`-save_dir`下,包含`__model__.encrypted`、`__params__.encrypted`和`model.yml`三个文件,同时生成密钥信息,命令输出如下图所示,密钥为`kLAl1qOs5uRbFt0/RrIDTZW2+tOf5bzvUIaHGF8lJ1c=`
+`-model_dir`用于指定inference模型路径(参考[导出inference模型](deploy_python.html#inference)将模型导出为inference格式模型),可使用[导出小度熊识别模型](deploy_python.html#inference)中导出的`inference_model`(**注意**:由于PaddleX代码的持续更新,版本低于1.0.0的模型暂时无法直接用于预测部署,参考[模型版本升级](../upgrade_version.md)对模型版本进行升级。)。加密完成后,加密过的模型会保存至指定的`-save_dir`下,包含`__model__.encrypted`、`__params__.encrypted`和`model.yml`三个文件,同时生成密钥信息,命令输出如下图所示,密钥为`kLAl1qOs5uRbFt0/RrIDTZW2+tOf5bzvUIaHGF8lJ1c=`
 
 ![](../images/encrypt.png)
 

+ 14 - 0
docs/tutorials/deploy/upgrade_version.md

@@ -0,0 +1,14 @@
+# 模型版本升级
+
+由于PaddleX代码的持续更新,版本低于1.0.0的模型暂时无法直接用于预测部署,用户需要按照以下步骤对模型版本进行转换,转换后的模型可以在多端上完成部署。
+
+## 检查模型版本
+
+存放模型的文件夹存有一个`model.yml`文件,该文件的最后一行`version`值表示模型的版本号,若版本号小于1.0.0,则需要进行版本转换,若版本号大于及等于1.0.0,则不需要进行版本转换。
+
+## 版本转换
+
+```
+paddlex --export_inference --model_dir=/path/to/low_version_model --save_dir=SSpath/to/high_version_model
+```
+`--model_dir`为版本号小于1.0.0的模型路径,可以是PaddleX训练过程保存的模型,也可以是导出为inference格式的模型。`--save_dir`为转换为高版本的模型,后续可用于多端部署。

+ 1 - 1
paddlex/cv/datasets/voc.py

@@ -113,7 +113,7 @@ class VOCDetection(Dataset):
                 is_crowd = np.zeros((len(objs), 1), dtype=np.int32)
                 difficult = np.zeros((len(objs), 1), dtype=np.int32)
                 for i, obj in enumerate(objs):
-                    cname = obj.find('name').text
+                    cname = obj.find('name').text.strip()
                     gt_class[i][0] = cname2cid[cname]
                     _difficult = int(obj.find('difficult').text)
                     x1 = float(obj.find('bndbox').find('xmin').text)

+ 2 - 8
paddlex/cv/models/slim/prune.py

@@ -42,7 +42,7 @@ def sensitivity(program,
     if pruned_ratios is None:
         pruned_ratios = np.arange(0.1, 1, step=0.1)
 
-    total_evaluate_iters = 1
+    total_evaluate_iters = 0
     for name in param_names:
         if name not in sensitivities:
             sensitivities[name] = {}
@@ -52,12 +52,6 @@ def sensitivity(program,
                 len(list(pruned_ratios)) - len(sensitivities[name]))
     eta = '-'
     start_time = time.time()
-    progress = 1.0 / total_evaluate_iters
-    progress = "%.2f%%" % (progress * 100)
-    logging.info(
-        "Total evaluate iters={}, current={}, progress={}, eta={}".format(
-            total_evaluate_iters, 1, progress, eta),
-        use_color=True)
     baseline = eval_func(graph.program)
     cost = time.time() - start_time
     eta = cost * (total_evaluate_iters - 1)
@@ -73,7 +67,7 @@ def sensitivity(program,
             logging.info(
                 "Total evaluate iters={}, current={}, progress={}, eta={}".
                 format(
-                    total_evaluate_iters, current_iter+1, progress,
+                    total_evaluate_iters, current_iter, progress,
                     seconds_to_hms(
                         int(cost * (total_evaluate_iters - current_iter)))),
                 use_color=True)

+ 1 - 1
paddlex/cv/models/slim/visualize.py

@@ -50,7 +50,7 @@ def visualize(model, sensitivities_file, save_dir='./'):
         min(np.array(x)) - 0.01,
         max(np.array(x)) + 0.01, 0.05)
     my_y_ticks = np.arange(0.05, 1, 0.05)
-    plt.xticks(my_x_ticks, rotation=30, fontsize=8)
+    plt.xticks(my_x_ticks, rotation=15, fontsize=8)
     plt.yticks(my_y_ticks, fontsize=8)
     for a, b in zip(x, y):
         plt.text(

+ 2 - 2
paddlex/cv/transforms/cls_transforms.py

@@ -103,14 +103,14 @@ class RandomCrop(ClsTransform):
 
     Args:
         crop_size (int): 随机裁剪后重新调整的目标边长。默认为224。
-        lower_scale (float): 裁剪面积相对原面积比例的最小限制。默认为0.88。
+        lower_scale (float): 裁剪面积相对原面积比例的最小限制。默认为0.08。
         lower_ratio (float): 宽变换比例的最小限制。默认为3. / 4。
         upper_ratio (float): 宽变换比例的最大限制。默认为4. / 3。
     """
 
     def __init__(self,
                  crop_size=224,
-                 lower_scale=0.88,
+                 lower_scale=0.08,
                  lower_ratio=3. / 4,
                  upper_ratio=4. / 3):
         self.crop_size = crop_size

+ 1 - 1
paddlex/cv/transforms/imgaug_support.py

@@ -23,7 +23,7 @@ def execute_imgaug(augmenter, im, bboxes=None, polygons=None,
     import imgaug.augmentables.bbs as bbs
 
     aug_im = im.astype('uint8')
-    aug_im = augmenter.augment(image=aug_im)
+    aug_im = augmenter.augment(image=aug_im).astype('float32')
     return aug_im
 
     # TODO imgaug的标注处理逻辑与paddlex已存的transform存在部分差异

+ 4 - 3
paddlex/interpret/as_data_reader/readers.py

@@ -20,6 +20,7 @@ import six
 import glob
 from .data_path_utils import _find_classes
 from PIL import Image
+import paddlex.utils.logging as logging
 
 
 def resize_short(img, target_size, interpolation=None):
@@ -117,7 +118,7 @@ def read_image(img_path, target_size=256, crop_size=224):
         assert len(img_path.shape) == 4
         return img_path
     else:
-        ValueError(f"Not recognized data type {type(img_path)}.")
+        ValueError("Not recognized data type {}.".format(type(img_path)))
 
 
 class ReaderConfig(object):
@@ -156,7 +157,7 @@ class ReaderConfig(object):
 
                 img = cv2.imread(img_path)
                 if img is None:
-                    print(img_path)
+                    logging.info(img_path)
                     continue
                 img = resize_short(img, target_size, interpolation=None)
                 img = crop_image(img, crop_size, center=self.is_test)
@@ -208,7 +209,7 @@ def create_reader(list_image_path, list_label=None, is_test=False):
 
             img = cv2.imread(img_path)
             if img is None:
-                print(img_path)
+                logging.info(img_path)
                 continue
 
             img = resize_short(img, target_size, interpolation=None)

+ 28 - 22
paddlex/interpret/core/interpretation_algorithms.py

@@ -21,6 +21,7 @@ from . import lime_base
 from ._session_preparation import paddle_get_fc_weights, compute_features_for_kmeans, gen_user_home
 from .normlime_base import combine_normlime_and_lime, get_feature_for_kmeans, load_kmeans_model
 from paddlex.interpret.as_data_reader.readers import read_image
+import paddlex.utils.logging as logging
 
 
 import cv2
@@ -71,7 +72,8 @@ class CAM(object):
         if self.label_names is not None:
             ln = self.label_names[l]
 
-        print(f'predicted result: {ln} with probability {probability[pred_label[0]]:.3f}')
+        prob_str = "%.3f" % (probability[pred_label[0]])
+        logging.info("predicted result: {} with probability {}.".format(ln, prob_str))
         return feature_maps, fc_weights
 
     def interpret(self, data_, visualization=True, save_to_disk=True, save_outdir=None):
@@ -96,7 +98,8 @@ class CAM(object):
                 ax.axis("off")
             axes = axes.ravel()
             axes[0].imshow(self.image)
-            axes[0].set_title(f"label {ln}, proba: {self.predicted_probability: .3f}")
+            prob_str = "{%.3f}" % (self.predicted_probability)
+            axes[0].set_title("label {}, proba: {}".format(ln, prob_str))
 
             axes[1].imshow(cam)
             axes[1].set_title("CAM")
@@ -157,14 +160,15 @@ class LIME(object):
         if self.label_names is not None:
             ln = self.label_names[l]
             
-        print(f'predicted result: {ln} with probability {probability[pred_label[0]]:.3f}')
+        prob_str = "%.3f" % (probability[pred_label[0]])
+        logging.info("predicted result: {} with probability {}.".format(ln, prob_str))
 
         end = time.time()
         algo = lime_base.LimeImageInterpreter()
         interpreter = algo.interpret_instance(self.image, self.predict_fn, self.labels, 0,
                                               num_samples=self.num_samples, batch_size=self.batch_size)
         self.lime_interpreter = interpreter
-        print('lime time: ', time.time() - end, 's.')
+        logging.info('lime time: ' + str(time.time() - end) + 's.')
 
     def interpret(self, data_, visualization=True, save_to_disk=True, save_outdir=None):
         if self.lime_interpreter is None:
@@ -189,7 +193,8 @@ class LIME(object):
                 ax.axis("off")
             axes = axes.ravel()
             axes[0].imshow(self.image)
-            axes[0].set_title(f"label {ln}, proba: {self.predicted_probability: .3f}")
+            prob_str = "{%.3f}" % (self.predicted_probability)
+            axes[0].set_title("label {}, proba: {}".format(ln, prob_str))
 
             axes[1].imshow(mark_boundaries(self.image, self.lime_interpreter.segments))
             axes[1].set_title("superpixel segmentation")
@@ -201,7 +206,7 @@ class LIME(object):
                     l, positive_only=False, hide_rest=False, num_features=num_to_show
                 )
                 axes[ncols + i].imshow(mark_boundaries(temp, mask))
-                axes[ncols + i].set_title(f"label {ln}, first {num_to_show} superpixels")
+                axes[ncols + i].set_title("label {}, first {} superpixels".format(ln, num_to_show))
 
         if save_to_disk and save_outdir is not None:
             os.makedirs(save_outdir, exist_ok=True)
@@ -232,8 +237,9 @@ class NormLIME(object):
                 raise ValueError("NormLIME needs the KMeans model, where we provided a default one in "
                                  "pre_models/kmeans_model.pkl.")
         else:
-            print("Warning: It is *strongly* suggested to use the default KMeans model in pre_models/kmeans_model.pkl. "
-                  "Use another one will change the final result.")
+            logging.debug("Warning: It is *strongly* suggested to use the \
+            default KMeans model in pre_models/kmeans_model.pkl. \
+            Use another one will change the final result.")
             self.kmeans_model = load_kmeans_model(kmeans_model_for_normlime)
 
         self.num_samples = num_samples
@@ -243,7 +249,7 @@ class NormLIME(object):
             self.normlime_weights = np.load(normlime_weights, allow_pickle=True).item()
         except:
             self.normlime_weights = None
-            print("Warning: not find the correct precomputed Normlime result.")
+            logging.debug("Warning: not find the correct precomputed Normlime result.")
 
         self.predict_fn = predict_fn
 
@@ -289,8 +295,7 @@ class NormLIME(object):
         self.predicted_probability = self._lime.predicted_probability
         self.image = image_show[0]
         self.labels = self._lime.labels
-        # print(f'predicted result: {self.predicted_label} with probability {self.predicted_probability: .3f}')
-        print('performing NormLIME operations ...')
+        logging.info('performing NormLIME operations ...')
 
         cluster_labels = self.predict_cluster_labels(
             compute_features_for_kmeans(image_show).transpose((1, 2, 0)), self._lime.lime_interpreter.segments
@@ -329,7 +334,8 @@ class NormLIME(object):
 
             axes = axes.ravel()
             axes[0].imshow(self.image)
-            axes[0].set_title(f"label {ln}, proba: {self.predicted_probability: .3f}")
+            prob_str = "{%.3f}" % (self.predicted_probability)
+            axes[0].set_title("label {}, proba: {}".format(ln, prob_str))
 
             axes[1].imshow(mark_boundaries(self.image, self._lime.lime_interpreter.segments))
             axes[1].set_title("superpixel segmentation")
@@ -342,7 +348,7 @@ class NormLIME(object):
                     l, positive_only=False, hide_rest=False, num_features=num_to_show
                 )
                 axes[ncols + i].imshow(mark_boundaries(temp, mask))
-                axes[ncols + i].set_title(f"LIME: first {num_to_show} superpixels")
+                axes[ncols + i].set_title("LIME: first {} superpixels".format(num_to_show))
 
             # NormLIME visualization
             self._lime.lime_interpreter.local_weights = g_weights
@@ -351,7 +357,7 @@ class NormLIME(object):
                     l, positive_only=False, hide_rest=False, num_features=num_to_show
                 )
                 axes[ncols * 2 + i].imshow(mark_boundaries(temp, mask))
-                axes[ncols * 2 + i].set_title(f"NormLIME: first {num_to_show} superpixels")
+                axes[ncols * 2 + i].set_title("NormLIME: first {} superpixels".format(num_to_show))
 
             # NormLIME*LIME visualization
             combined_weights = combine_normlime_and_lime(lime_weights, g_weights)
@@ -361,7 +367,7 @@ class NormLIME(object):
                     l, positive_only=False, hide_rest=False, num_features=num_to_show
                 )
                 axes[ncols * 3 + i].imshow(mark_boundaries(temp, mask))
-                axes[ncols * 3 + i].set_title(f"Combined: first {num_to_show} superpixels")
+                axes[ncols * 3 + i].set_title("Combined: first {} superpixels".format(num_to_show))
 
             self._lime.lime_interpreter.local_weights = lime_weights
 
@@ -433,32 +439,32 @@ def save_fig(data_, save_outdir, algorithm_name, num_samples=3000):
     import matplotlib.pyplot as plt
     if isinstance(data_, str):
         if algorithm_name == 'cam':
-            f_out = f"{algorithm_name}_{data_.split('/')[-1]}.png"
+            f_out = "{}_{}.png".format(algorithm_name, data_.split('/')[-1])
         else:
-            f_out = f"{algorithm_name}_{data_.split('/')[-1]}_s{num_samples}.png"
+            f_out = "{}_{}_s{}.png".format(algorithm_name, data_.split('/')[-1], num_samples)
         plt.savefig(
             os.path.join(save_outdir, f_out)
         )
     else:
         n = 0
         if algorithm_name == 'cam':
-            f_out = f'cam-{n}.png'
+            f_out = 'cam-{}.png'.format(n)
         else:
-            f_out = f'{algorithm_name}_s{num_samples}-{n}.png'
+            f_out = '{}_s{}-{}.png'.format(algorithm_name, num_samples, n)
         while os.path.exists(
                 os.path.join(save_outdir, f_out)
         ):
             n += 1
             if algorithm_name == 'cam':
-                f_out = f'cam-{n}.png'
+                f_out = 'cam-{}.png'.format(n)
             else:
-                f_out = f'{algorithm_name}_s{num_samples}-{n}.png'
+                f_out = '{}_s{}-{}.png'.format(algorithm_name, num_samples, n)
             continue
         plt.savefig(
             os.path.join(
                 save_outdir, f_out
             )
         )
-    print('The image of intrepretation result save in {}'.format(os.path.join(
+    logging.info('The image of intrepretation result save in {}'.format(os.path.join(
                 save_outdir, f_out
             )))

+ 4 - 4
paddlex/interpret/core/lime_base.py

@@ -34,6 +34,7 @@ import scipy as sp
 import tqdm
 import copy
 from functools import partial
+import paddlex.utils.logging as logging
 
 
 class LimeBase(object):
@@ -230,9 +231,9 @@ class LimeBase(object):
         local_pred = easy_model.predict(neighborhood_data[0, used_features].reshape(1, -1))
 
         if self.verbose:
-            print('Intercept', easy_model.intercept_)
-            print('Prediction_local', local_pred,)
-            print('Right:', neighborhood_labels[0, label])
+            logging.info('Intercept' + str(easy_model.intercept_))
+            logging.info('Prediction_local' + str(local_pred))
+            logging.info('Right:' + str(neighborhood_labels[0, label]))
         return (easy_model.intercept_,
                 sorted(zip(used_features, easy_model.coef_),
                        key=lambda x: np.abs(x[1]), reverse=True),
@@ -451,7 +452,6 @@ class LimeImageInterpreter(object):
             d = cdist(centroids, centroids, 'sqeuclidean')
 
             for x in np.unique(segments):
-                # print(np.argmin(d[x]))
                 a = [image[segments == i] for i in np.argsort(d[x])[1:6]]
                 mx = np.mean(np.concatenate(a), axis=0)
                 fudged_image[segments == x] = mx

+ 16 - 16
paddlex/interpret/core/normlime_base.py

@@ -21,6 +21,7 @@ from paddlex.interpret.as_data_reader.readers import read_image
 import paddlex.utils.logging as logging
 from . import lime_base
 from ._session_preparation import compute_features_for_kmeans, gen_user_home
+import paddlex.utils.logging as logging
 
 
 def load_kmeans_model(fname):
@@ -67,7 +68,6 @@ def centroid_using_superpixels(features, segments):
     one_list = np.zeros((len(np.unique(segments)), features.shape[2]))
     for i, r in enumerate(regions):
         one_list[i] = features[int(r.centroid[0] + 0.5), int(r.centroid[1] + 0.5), :]
-    # print(one_list.shape)
     return one_list
 
 
@@ -85,7 +85,7 @@ def precompute_normlime_weights(list_data_, predict_fn, num_samples=3000, batch_
     precompute_lime_weights(list_data_, predict_fn, num_samples, batch_size, save_dir)
 
     # load precomputed results, compute normlime weights and save.
-    fname_list = glob.glob(os.path.join(save_dir, f'lime_weights_s{num_samples}*.npy'))
+    fname_list = glob.glob(os.path.join(save_dir, 'lime_weights_s{}*.npy'.format(num_samples)))
     return compute_normlime_weights(fname_list, save_dir, num_samples)
 
 
@@ -117,10 +117,10 @@ def precompute_lime_weights(list_data_, predict_fn, num_samples, batch_size, sav
 
     for data_index, each_data_ in enumerate(list_data_):
         if isinstance(each_data_, str):
-            save_path = f"lime_weights_s{num_samples}_{each_data_.split('/')[-1].split('.')[0]}.npy"
+            save_path = "lime_weights_s{}_{}.npy".format(num_samples, each_data_.split('/')[-1].split('.')[0])
             save_path = os.path.join(save_dir, save_path)
         else:
-            save_path = f"lime_weights_s{num_samples}_{data_index}.npy"
+            save_path = "lime_weights_s{}_{}.npy".format(num_samples, data_index)
             save_path = os.path.join(save_dir, save_path)
 
         if os.path.exists(save_path):
@@ -174,16 +174,16 @@ def precompute_lime_weights(list_data_, predict_fn, num_samples, batch_size, sav
 
 def compute_normlime_weights(a_list_lime_fnames, save_dir, lime_num_samples):
     normlime_weights_all_labels = {}
+    
     for f in a_list_lime_fnames:
         try:
             lime_weights_and_cluster = np.load(f, allow_pickle=True).item()
             lime_weights = lime_weights_and_cluster['lime_weights']
             cluster = lime_weights_and_cluster['cluster']
         except:
-            print('When loading precomputed LIME result, skipping', f)
+            logging.info('When loading precomputed LIME result, skipping' + str(f))
             continue
-        print('Loading precomputed LIME result,', f)
-
+        logging.info('Loading precomputed LIME result,' + str(f))
         pred_labels = lime_weights.keys()
         for y in pred_labels:
             normlime_weights = normlime_weights_all_labels.get(y, {})
@@ -207,23 +207,23 @@ def compute_normlime_weights(a_list_lime_fnames, save_dir, lime_num_samples):
 
     # check normlime
     if len(normlime_weights_all_labels.keys()) < max(normlime_weights_all_labels.keys()) + 1:
-        print(
-            "\n"
-            "Warning: !!! \n"
-            f"There are at least {max(normlime_weights_all_labels.keys()) + 1} classes, "
-            f"but the NormLIME has results of only {len(normlime_weights_all_labels.keys())} classes. \n"
-            "It may have cause unstable results in the later computation"
-            " but can be improved by computing more test samples."
+        logging.info(
+            "\n" + \
+            "Warning: !!! \n" + \
+            "There are at least {} classes, ".format(max(normlime_weights_all_labels.keys()) + 1) + \
+            "but the NormLIME has results of only {} classes. \n".format(len(normlime_weights_all_labels.keys())) + \
+            "It may have cause unstable results in the later computation" + \
+            " but can be improved by computing more test samples." + \
             "\n"
         )
 
     n = 0
-    f_out = f'normlime_weights_s{lime_num_samples}_samples_{len(a_list_lime_fnames)}-{n}.npy'
+    f_out = 'normlime_weights_s{}_samples_{}-{}.npy'.format(lime_num_samples, len(a_list_lime_fnames), n)
     while os.path.exists(
             os.path.join(save_dir, f_out)
     ):
         n += 1
-        f_out = f'normlime_weights_s{lime_num_samples}_samples_{len(a_list_lime_fnames)}-{n}.npy'
+        f_out = 'normlime_weights_s{}_samples_{}-{}.npy'.format(lime_num_samples, len(a_list_lime_fnames), n)
         continue
 
     np.save(

+ 3 - 5
paddlex/utils/utils.py

@@ -269,11 +269,9 @@ def load_pretrain_weights(exe,
         vars_to_load.append(var)
         logging.debug("Weight {} will be load".format(var.name))
 
-    fluid.io.load_vars(
-        executor=exe,
-        dirname=weights_dir,
-        main_program=main_prog,
-        vars=vars_to_load)
+    params_dict = fluid.io.load_program_state(
+        weights_dir, var_list=vars_to_load)
+    fluid.io.set_program_state(main_prog, params_dict)
     if len(vars_to_load) == 0:
         logging.warning(
             "There is no pretrain weights loaded, maybe you should check you pretrain model!"

+ 15 - 0
tools/codestyle/clang_format.hook

@@ -0,0 +1,15 @@
+#!/bin/bash
+set -e
+
+readonly VERSION="3.8"
+
+version=$(clang-format -version)
+
+if ! [[ $version == *"$VERSION"* ]]; then
+    echo "clang-format version check failed."
+    echo "a version contains '$VERSION' is needed, but get '$version'"
+    echo "you can install the right version, and make an soft-link to '\$PATH' env"
+    exit -1
+fi
+
+clang-format $@

+ 27 - 0
tools/codestyle/cpplint_pre_commit.hook

@@ -0,0 +1,27 @@
+#!/bin/bash
+
+TOTAL_ERRORS=0
+if [[ ! $TRAVIS_BRANCH ]]; then
+  # install cpplint on local machine.
+  if [[ ! $(which cpplint) ]]; then
+    pip install cpplint
+  fi
+  # diff files on local machine. 
+  files=$(git diff --cached --name-status | awk '$1 != "D" {print $2}')
+else
+  # diff files between PR and latest commit on Travis CI. 
+  branch_ref=$(git rev-parse "$TRAVIS_BRANCH")
+  head_ref=$(git rev-parse HEAD)
+  files=$(git diff --name-status $branch_ref $head_ref | awk '$1 != "D" {print $2}')
+fi
+# The trick to remove deleted files: https://stackoverflow.com/a/2413151
+for file in $files; do
+    if [[ $file =~ ^(patches/.*) ]]; then
+        continue;
+    else
+        cpplint --filter=-readability/fn_size $file;
+        TOTAL_ERRORS=$(expr $TOTAL_ERRORS + $?);
+    fi
+done
+
+exit $TOTAL_ERRORS