Browse Source

modify details

FlyingQianMM 5 years ago
parent
commit
393475941e

+ 12 - 14
deploy/cpp/src/paddlex.cpp

@@ -599,20 +599,18 @@ bool Model::predict(const cv::Mat& im, SegResult* result) {
       inputs_.im_size_before_resize_.pop_back();
       auto resize_w = before_shape[0];
       auto resize_h = before_shape[1];
-      if (mask_label->rows != resize_h || mask_label->cols != resize_w) {
-        cv::resize(mask_label,
-                   mask_label,
-                   cv::Size(resize_h, resize_w),
-                   0,
-                   0,
-                   cv::INTER_NEAREST);
-        cv::resize(mask_score,
-                   mask_score,
-                   cv::Size(resize_h, resize_w),
-                   0,
-                   0,
-                   cv::INTER_NEAREST);
-      }
+      cv::resize(mask_label,
+                 mask_label,
+                 cv::Size(resize_h, resize_w),
+                 0,
+                 0,
+                 cv::INTER_NEAREST);
+      cv::resize(mask_score,
+                 mask_score,
+                 cv::Size(resize_h, resize_w),
+                 0,
+                 0,
+                 cv::INTER_LINEAR);
     }
     ++idx;
   }

+ 2 - 4
deploy/cpp/src/transforms.cpp

@@ -152,10 +152,8 @@ bool Resize::Run(cv::Mat* im, ImageBlob* data) {
   data->im_size_before_resize_.push_back({im->rows, im->cols});
   data->reshape_order_.push_back("resize");
 
-  if (im->rows != height_ || im->cols != width_) {
-    cv::resize(
-        *im, *im, cv::Size(width_, height_), 0, 0, interpolations[interp_]);
-  }
+  cv::resize(
+      *im, *im, cv::Size(width_, height_), 0, 0, interpolations[interp_]);
   data->new_im_size_[0] = im->rows;
   data->new_im_size_[1] = im->cols;
   return true;

+ 18 - 16
examples/meter_reader/README.md

@@ -28,13 +28,13 @@
 
 | 表计测试图片                                                 | 表计检测数据集                                               | 指针和刻度分割数据集                                         |
 | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ |
-| [meter_test](https://bj.bcebos.com/paddlex/meterreader/datasets/meter_test.tar.gz) | [meter_det](https://bj.bcebos.com/paddlex/meterreader/datasets/meter_det.tar.gz) | [meter_seg](https://bj.bcebos.com/paddlex/meterreader/datasets/meter_seg.tar.gz) |
+| [meter_test](https://bj.bcebos.com/paddlex/examples/meter_reader/datasets/meter_test.tar.gz) | [meter_det](https://bj.bcebos.com/paddlex/examples/meter_reader/datasets/meter_det.tar.gz) | [meter_seg](https://bj.bcebos.com/paddlex/examples/meter_reader/datasets/meter_seg.tar.gz) |
 
 本案例开放了预先训练好的检测模型和语义分割模型,可以使用这些模型快速体验表计读数全流程,也可以直接将这些模型部署在服务器端或jetson嵌入式设备上进行推理预测。
 
 | 表计检测模型                                                 | 指针和刻度分割模型                                           |
 | ------------------------------------------------------------ | ------------------------------------------------------------ |
-| [meter_det_inference_model](https://bj.bcebos.com/paddlex/meterreader/models/meter_det_inference_model.tar.gz) | [meter_seg_inference_model](https://bj.bcebos.com/paddlex/meterreader/models/meter_seg_inference_model.tar.gz) |
+| [meter_det_inference_model](https://bj.bcebos.com/paddlex/examples/meter_reader/models/meter_det_inference_model.tar.gz) | [meter_seg_inference_model](https://bj.bcebos.com/paddlex/examples/meter_reader/models/meter_seg_inference_model.tar.gz) |
 
 
 ## <h2 id="3">快速体验表盘读数</h2>
@@ -77,6 +77,7 @@ cd PaddleX/examples/meter_reader/
 | seg_batch_size | 分割的批量大小,默认为2 |
 | seg_thread_num	| 分割预测的线程数,默认为cpu处理器个数 |
 | use_camera | 是否使用摄像头采集图片,默认为False |
+| camera_id | 摄像头设备ID,默认值为0 |
 | use_erode | 是否使用图像腐蚀对分割预测图进行细分,默认为False |
 | erode_kernel | 图像腐蚀操作时的卷积核大小,默认值为4 |
 
@@ -122,7 +123,7 @@ python3 reader_infer.py --detector_dir /path/to/det_inference_model --segmenter_
 git clone https://github.com/PaddlePaddle/PaddleX
 ```
 
-2. 将`PaddleX\examples\meter_reader\deploy\cpp`下的`meter`文件夹和`CMakeList.txt`拷贝至`PaddleX\deploy\cpp`目录下,拷贝之前可以将`PaddleX\deploy\cpp`下原本的`CMakeList.txt`做好备份。
+2. 将`PaddleX\examples\meter_reader\deploy\cpp`下的`meter_reader`文件夹和`CMakeList.txt`拷贝至`PaddleX\deploy\cpp`目录下,拷贝之前可以将`PaddleX\deploy\cpp`下原本的`CMakeList.txt`做好备份。
 
 3. 按照[Windows平台部署](https://github.com/PaddlePaddle/PaddleX/blob/develop/docs/tutorials/deploy/deploy_server/deploy_cpp/deploy_cpp_win_vs2019.md)中的Step2至Step4完成C++预测代码的编译。
 
@@ -132,7 +133,7 @@ git clone https://github.com/PaddlePaddle/PaddleX
    cd PaddleX\deploy\cpp\out\build\x64-Release
    ```
 
-   预测程序为paddle_inference\meter.exe,其主要命令参数说明如下:
+   预测程序为paddle_inference\meter_reader.exe,其主要命令参数说明如下:
 
    | 参数    | 说明   |
    | ---- | ---- |
@@ -151,7 +152,7 @@ git clone https://github.com/PaddlePaddle/PaddleX
    | camera_id | 摄像头设备ID,默认值为0 |
    | use_erode | 是否使用图像腐蚀对分割预测图进行去噪,支持值为0或1(默认值为1) |
    | erode_kernel | 图像腐蚀操作时的卷积核大小,默认值为4 |
-
+   | score_threshold | 检测模型输出结果中,预测得分低于该阈值的框将被滤除,默认值为0.5|
 
 5. 推理预测:
 
@@ -160,19 +161,19 @@ git clone https://github.com/PaddlePaddle/PaddleX
   * 使用未加密的模型对单张图片做预测
 
   ```shell
-  .\paddlex_inference\meter.exe --det_model_dir=\path\to\det_inference_model --seg_model_dir=\path\to\seg_inference_model --image=\path\to\meter_test\20190822_168.jpg --use_gpu=1 --use_erode=1 --save_dir=output
+  .\paddlex_inference\meter_reader.exe --det_model_dir=\path\to\det_inference_model --seg_model_dir=\path\to\seg_inference_model --image=\path\to\meter_test\20190822_168.jpg --use_gpu=1 --use_erode=1 --save_dir=output
   ```
 
   * 使用未加密的模型对图像列表做预测
 
   ```shell
-  .\paddlex_inference\meter.exe --det_model_dir=\path\to\det_inference_model --seg_model_dir=\path\to\seg_inference_model --image_list=\path\to\meter_test\image_list.txt --use_gpu=1 --use_erode=1 --save_dir=output
+  .\paddlex_inference\meter_reader.exe --det_model_dir=\path\to\det_inference_model --seg_model_dir=\path\to\seg_inference_model --image_list=\path\to\meter_test\image_list.txt --use_gpu=1 --use_erode=1 --save_dir=output
   ```
 
   * 使用未加密的模型开启摄像头做预测
 
   ```shell
-  .\paddlex_inference\meter.exe --det_model_dir=\path\to\det_inference_model --seg_model_dir=\path\to\seg_inference_model --use_camera=1 --use_gpu=1 --use_erode=1 --save_dir=output
+  .\paddlex_inference\meter_reader.exe --det_model_dir=\path\to\det_inference_model --seg_model_dir=\path\to\seg_inference_model --use_camera=1 --use_gpu=1 --use_erode=1 --save_dir=output
   ```
 
   * 使用加密后的模型对单张图片做预测
@@ -180,7 +181,7 @@ git clone https://github.com/PaddlePaddle/PaddleX
   如果未对模型进行加密,请参考[加密PaddleX模型](https://github.com/PaddlePaddle/PaddleX/blob/develop/docs/tutorials/deploy/deploy_server/encryption.html#paddlex)对模型进行加密。例如加密后的检测模型所在目录为`\path\to\encrypted_det_inference_model`,密钥为`yEBLDiBOdlj+5EsNNrABhfDuQGkdcreYcHcncqwdbx0=`;加密后的分割模型所在目录为`\path\to\encrypted_seg_inference_model`,密钥为`DbVS64I9pFRo5XmQ8MNV2kSGsfEr4FKA6OH9OUhRrsY=`
 
   ```shell
-  .\paddlex_inference\meter.exe --det_model_dir=\path\to\encrypted_det_inference_model --seg_model_dir=\path\to\encrypted_seg_inference_model --image=\path\to\test.jpg --use_gpu=1 --use_erode=1 --save_dir=output --det_key yEBLDiBOdlj+5EsNNrABhfDuQGkdcreYcHcncqwdbx0= --seg_key DbVS64I9pFRo5XmQ8MNV2kSGsfEr4FKA6OH9OUhRrsY=
+  .\paddlex_inference\meter_reader.exe --det_model_dir=\path\to\encrypted_det_inference_model --seg_model_dir=\path\to\encrypted_seg_inference_model --image=\path\to\test.jpg --use_gpu=1 --use_erode=1 --save_dir=output --det_key yEBLDiBOdlj+5EsNNrABhfDuQGkdcreYcHcncqwdbx0= --seg_key DbVS64I9pFRo5XmQ8MNV2kSGsfEr4FKA6OH9OUhRrsY=
   ```
 
 ### Linux系统的jeton嵌入式设备安全部署
@@ -193,12 +194,12 @@ git clone https://github.com/PaddlePaddle/PaddleX
 git clone https://github.com/PaddlePaddle/PaddleX
 ```
 
-2. 将`PaddleX/examples/meter_readerdeploy/cpp`下的`meter`文件夹和`CMakeList.txt`拷贝至`PaddleX/deploy/cpp`目录下,拷贝之前可以将`PaddleX/deploy/cpp`下原本的`CMakeList.txt`做好备份。
+2. 将`PaddleX/examples/meter_reader/deploy/cpp`下的`meter_reader`文件夹和`CMakeList.txt`拷贝至`PaddleX/deploy/cpp`目录下,拷贝之前可以将`PaddleX/deploy/cpp`下原本的`CMakeList.txt`做好备份。
 
 3. 按照[Nvidia-Jetson开发板部署]()中的Step2至Step3完成C++预测代码的编译。
 
-4. 编译成功后,可执行程为`build/meter/detector`,其主要命令参数说明如下:
-	
+4. 编译成功后,可执行程为`build/meter_reader/meter_reader`,其主要命令参数说明如下:
+
   | 参数    | 说明   |
   | ---- | ---- |
   |  det_model_dir    | 表计检测模型路径     |
@@ -216,6 +217,7 @@ git clone https://github.com/PaddlePaddle/PaddleX
   | camera_id | 摄像头设备ID,默认值为0 |
   | use_erode | 是否使用图像腐蚀对分割预测图进行细分,支持值为0或1(默认值为1) |
   | erode_kernel | 图像腐蚀操作时的卷积核大小,默认值为4 |
+  | score_threshold | 检测模型输出结果中,预测得分低于该阈值的框将被滤除,默认值为0.5|
 
 5. 推理预测:
 
@@ -224,19 +226,19 @@ git clone https://github.com/PaddlePaddle/PaddleX
   * 使用未加密的模型对单张图片做预测
 
   ```shell
-  ./build/meter/meter --det_model_dir=/path/to/det_inference_model --seg_model_dir=/path/to/seg_inference_model --image=/path/to/meter_test/20190822_168.jpg --use_gpu=1 --use_erode=1 --save_dir=output
+  ./build/meter_reader/meter_reader --det_model_dir=/path/to/det_inference_model --seg_model_dir=/path/to/seg_inference_model --image=/path/to/meter_test/20190822_168.jpg --use_gpu=1 --use_erode=1 --save_dir=output
   ```
 
   * 使用未加密的模型对图像列表做预测
 
   ```shell
-  ./build/meter/meter --det_model_dir=/path/to/det_inference_model --seg_model_dir=/path/to/seg_inference_model --image_list=/path/to/image_list.txt --use_gpu=1 --use_erode=1 --save_dir=output
+  ./build/meter_reader/meter_reader --det_model_dir=/path/to/det_inference_model --seg_model_dir=/path/to/seg_inference_model --image_list=/path/to/image_list.txt --use_gpu=1 --use_erode=1 --save_dir=output
   ```
 
   * 使用未加密的模型开启摄像头做预测
 
   ```shell
-  ./build/meter/meter --det_model_dir=/path/to/det_inference_model --seg_model_dir=/path/to/seg_inference_model --use_camera=1 --use_gpu=1 --use_erode=1 --save_dir=output
+  ./build/meter_reader/meter_reader --det_model_dir=/path/to/det_inference_model --seg_model_dir=/path/to/seg_inference_model --use_camera=1 --use_gpu=1 --use_erode=1 --save_dir=output
   ```
 
   * 使用加密后的模型对单张图片做预测
@@ -244,7 +246,7 @@ git clone https://github.com/PaddlePaddle/PaddleX
   如果未对模型进行加密,请参考[加密PaddleX模型](https://github.com/PaddlePaddle/PaddleX/blob/develop/docs/tutorials/deploy/deploy_server/encryption.html#paddlex)对模型进行加密。例如加密后的检测模型所在目录为`/path/to/encrypted_det_inference_model`,密钥为`yEBLDiBOdlj+5EsNNrABhfDuQGkdcreYcHcncqwdbx0=`;加密后的分割模型所在目录为`/path/to/encrypted_seg_inference_model`,密钥为`DbVS64I9pFRo5XmQ8MNV2kSGsfEr4FKA6OH9OUhRrsY=`
 
   ```shell
-  ./build/meter/meter --det_model_dir=/path/to/encrypted_det_inference_model --seg_model_dir=/path/to/encrypted_seg_inference_model --image=/path/to/test.jpg --use_gpu=1 --use_erode=1 --save_dir=output --det_key yEBLDiBOdlj+5EsNNrABhfDuQGkdcreYcHcncqwdbx0= --seg_key DbVS64I9pFRo5XmQ8MNV2kSGsfEr4FKA6OH9OUhRrsY=
+  ./build/meter_reader/meter_reader --det_model_dir=/path/to/encrypted_det_inference_model --seg_model_dir=/path/to/encrypted_seg_inference_model --image=/path/to/test.jpg --use_gpu=1 --use_erode=1 --save_dir=output --det_key yEBLDiBOdlj+5EsNNrABhfDuQGkdcreYcHcncqwdbx0= --seg_key DbVS64I9pFRo5XmQ8MNV2kSGsfEr4FKA6OH9OUhRrsY=
   ```
 
 

+ 11 - 11
examples/meter_reader/deploy/cpp/CMakeLists.txt

@@ -1,11 +1,11 @@
 cmake_minimum_required(VERSION 3.0)
 project(PaddleX CXX C)
 
-option(WITH_MKL        "Compile meter with MKL/OpenBlas support,defaultuseMKL."          ON)
-option(WITH_GPU        "Compile meter with GPU/CPU, default use CPU."                    ON)
-option(WITH_STATIC_LIB "Compile meter with static/shared library, default use static."   OFF)
-option(WITH_TENSORRT "Compile meter with TensorRT."   OFF)
-option(WITH_ENCRYPTION "Compile meter with encryption tool."   OFF)
+option(WITH_MKL        "Compile meter_reader with MKL/OpenBlas support,defaultuseMKL."          ON)
+option(WITH_GPU        "Compile meter_reader with GPU/CPU, default use CPU."                    ON)
+option(WITH_STATIC_LIB "Compile meter_reader with static/shared library, default use static."   OFF)
+option(WITH_TENSORRT "Compile meter_reader with TensorRT."   OFF)
+option(WITH_ENCRYPTION "Compile meter_reader with encryption tool."   OFF)
 
 SET(TENSORRT_DIR "" CACHE PATH "Location of libraries")
 SET(PADDLE_DIR "" CACHE PATH "Location of libraries")
@@ -16,7 +16,7 @@ SET(CUDA_LIB "" CACHE PATH "Location of libraries")
 if (NOT WIN32)
     set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
     set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
-    set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/meter)
+    set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/meter_reader)
 else()
     set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/paddlex_inference)
     set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/paddlex_inference)
@@ -275,12 +275,12 @@ add_library(paddlex_inference SHARED src/visualize src/transforms.cpp src/paddle
 ADD_DEPENDENCIES(paddlex_inference ext-yaml-cpp)
 target_link_libraries(paddlex_inference ${DEPS})
 
-add_executable(meter meter/meter.cpp meter/global.cpp meter/readvalue.cpp src/transforms.cpp src/paddlex.cpp src/visualize.cpp)
-ADD_DEPENDENCIES(meter ext-yaml-cpp)
-target_link_libraries(meter ${DEPS})
+add_executable(meter_reader meter_reader/meter_reader.cpp meter_reader/global.cpp meter_reader/postprocess.cpp src/transforms.cpp src/paddlex.cpp src/visualize.cpp)
+ADD_DEPENDENCIES(meter_reader ext-yaml-cpp)
+target_link_libraries(meter_reader ${DEPS})
 
 if (WIN32 AND WITH_MKL)
-    add_custom_command(TARGET meter POST_BUILD
+    add_custom_command(TARGET meter_reader POST_BUILD
         COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/mklml.dll ./mklml.dll
         COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/libiomp5md.dll ./libiomp5md.dll
         COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mkldnn/lib/mkldnn.dll ./mkldnn.dll
@@ -290,7 +290,7 @@ if (WIN32 AND WITH_MKL)
     )
     # for encryption
     if (EXISTS "${ENCRYPTION_DIR}/lib/pmodel-decrypt.dll")
-        add_custom_command(TARGET meter POST_BUILD
+        add_custom_command(TARGET meter_reader POST_BUILD
             COMMAND ${CMAKE_COMMAND} -E copy_if_different ${ENCRYPTION_DIR}/lib/pmodel-decrypt.dll ./pmodel-decrypt.dll
             COMMAND ${CMAKE_COMMAND} -E copy_if_different ${ENCRYPTION_DIR}/lib/pmodel-decrypt.dll ./release/pmodel-decrypt.dll
         )

+ 1 - 1
examples/meter_reader/deploy/cpp/meter/global.cpp → examples/meter_reader/deploy/cpp/meter_reader/global.cpp

@@ -21,7 +21,7 @@
 #include <opencv2/highgui.hpp>
 #include <opencv2/core/core.hpp>
 
-#include "meter/global.h"
+#include "meter_reader/global.h"
 
 std::vector<int> IMAGE_SHAPE = {1920, 1080};
 std::vector<int> RESULT_SHAPE = {1280, 720};

+ 0 - 0
examples/meter_reader/deploy/cpp/meter/global.h → examples/meter_reader/deploy/cpp/meter_reader/global.h


+ 33 - 15
examples/meter_reader/deploy/cpp/meter/meter.cpp → examples/meter_reader/deploy/cpp/meter_reader/meter_reader.cpp

@@ -26,8 +26,8 @@
 #include <opencv2/highgui.hpp>
 #include <opencv2/core/core.hpp>
 
-#include "meter/global.h"
-#include "meter/readvalue.h"
+#include "meter_reader/global.h"
+#include "meter_reader/postprocess.h"
 #include "include/paddlex/paddlex.h"
 #include "include/paddlex/visualize.h"
 
@@ -51,16 +51,31 @@ DEFINE_string(seg_key, "", "Segmenter model key of encryption");
 DEFINE_string(image, "", "Path of test image file");
 DEFINE_string(image_list, "", "Path of test image list file");
 DEFINE_string(save_dir, "output", "Path to save visualized image");
+DEFINE_double(score_threshold, 0.5, "Detected bbox whose score is lower than this threshlod is filtered");
 
 void predict(const cv::Mat &input_image, PaddleX::Model *det_model,
              PaddleX::Model *seg_model, const std::string save_dir,
              const std::string image_path, const bool use_erode,
              const int erode_kernel, const int thread_num,
-             const int seg_batch_size) {
+             const int seg_batch_size, const double threshold) {
   PaddleX::DetResult det_result;
   det_model->predict(input_image, &det_result);
 
-  int meter_num = det_result.boxes.size();
+  PaddleX::DetResult filter_result;
+  int num_bboxes = det_result.boxes.size();
+  for (int i = 0; i < num_bboxes; ++i) {
+    double score = det_result.boxes[i].score;
+    if (score > threshold || score == threshold) {
+      PaddleX::Box box;
+      box.category_id = det_result.boxes[i].category_id;
+      box.category = det_result.boxes[i].category;
+      box.score = det_result.boxes[i].score;
+      box.coordinate = det_result.boxes[i].coordinate;
+      filter_result.boxes.push_back(std::move(box));
+    }
+  }
+
+  int meter_num = filter_result.boxes.size();
   if (!meter_num) {
       std::cout << "Don't find any meter." << std::endl;
       return;
@@ -74,10 +89,10 @@ void predict(const cv::Mat &input_image, PaddleX::Model *det_model,
     int batch_thread_num = std::min(thread_num, im_vec_size - i);
     #pragma omp parallel for num_threads(batch_thread_num)
     for (int j = i; j < im_vec_size; ++j) {
-      int left = static_cast<int>(det_result.boxes[j].coordinate[0]);
-      int top = static_cast<int>(det_result.boxes[j].coordinate[1]);
-      int width = static_cast<int>(det_result.boxes[j].coordinate[2]);
-      int height = static_cast<int>(det_result.boxes[j].coordinate[3]);
+      int left = static_cast<int>(filter_result.boxes[j].coordinate[0]);
+      int top = static_cast<int>(filter_result.boxes[j].coordinate[1]);
+      int width = static_cast<int>(filter_result.boxes[j].coordinate[2]);
+      int height = static_cast<int>(filter_result.boxes[j].coordinate[3]);
       int right = left + width - 1;
       int bottom = top + height - 1;
 
@@ -142,10 +157,10 @@ void predict(const cv::Mat &input_image, PaddleX::Model *det_model,
               << " -- result: " << result
               << " --" << std::endl;
 
-    int lx = static_cast<int>(det_result.boxes[i].coordinate[0]);
-    int ly = static_cast<int>(det_result.boxes[i].coordinate[1]);
-    int w = static_cast<int>(det_result.boxes[i].coordinate[2]);
-    int h = static_cast<int>(det_result.boxes[i].coordinate[3]);
+    int lx = static_cast<int>(filter_result.boxes[i].coordinate[0]);
+    int ly = static_cast<int>(filter_result.boxes[i].coordinate[1]);
+    int w = static_cast<int>(filter_result.boxes[i].coordinate[2]);
+    int h = static_cast<int>(filter_result.boxes[i].coordinate[3]);
 
     cv::Rect bounding_box = cv::Rect(lx, ly, w, h) &
         cv::Rect(0, 0, output_image.cols, output_image.rows);
@@ -223,7 +238,8 @@ int main(int argc, char **argv) {
       std::string ext_name = ".jpg";
       predict(im, &det_model, &seg_model, FLAGS_save_dir,
               std::to_string(imgs) + ext_name, FLAGS_use_erode,
-              FLAGS_erode_kernel, FLAGS_thread_num, FLAGS_seg_batch_size);
+              FLAGS_erode_kernel, FLAGS_thread_num,
+              FLAGS_seg_batch_size, FLAGS_score_threshold);
       imgs++;
       auto imread_duration = duration_cast<microseconds>(imread_end - start);
       total_imread_time_s += static_cast<double>(imread_duration.count()) *
@@ -254,7 +270,8 @@ int main(int argc, char **argv) {
 
         predict(im, &det_model, &seg_model, FLAGS_save_dir,
                 image_path, FLAGS_use_erode, FLAGS_erode_kernel,
-                FLAGS_thread_num, FLAGS_seg_batch_size);
+                FLAGS_thread_num, FLAGS_seg_batch_size,
+                FLAGS_score_threshold);
 
         auto imread_duration = duration_cast<microseconds>(imread_end - start);
         total_imread_time_s += static_cast<double>(imread_duration.count()) *
@@ -274,7 +291,8 @@ int main(int argc, char **argv) {
 
       predict(im, &det_model, &seg_model, FLAGS_save_dir,
               FLAGS_image, FLAGS_use_erode, FLAGS_erode_kernel,
-              FLAGS_thread_num, FLAGS_seg_batch_size);
+              FLAGS_thread_num, FLAGS_seg_batch_size,
+              FLAGS_score_threshold);
 
       auto imread_duration = duration_cast<microseconds>(imread_end - start);
       total_imread_time_s += static_cast<double>(imread_duration.count()) *

+ 2 - 2
examples/meter_reader/deploy/cpp/meter/readvalue.cpp → examples/meter_reader/deploy/cpp/meter_reader/postprocess.cpp

@@ -24,8 +24,8 @@
 #include <opencv2/highgui.hpp>
 #include <opencv2/core/core.hpp>
 
-#include "meter/global.h"
-#include "meter/readvalue.h"
+#include "meter_reader/global.h"
+#include "meter_reader/postprocess.h"
 
 using namespace std::chrono;  // NOLINT
 

+ 0 - 0
examples/meter_reader/deploy/cpp/meter/readvalue.h → examples/meter_reader/deploy/cpp/meter_reader/postprocess.h


+ 10 - 4
examples/meter_reader/deploy/python/reader_deploy.py

@@ -71,6 +71,12 @@ def parse_args():
         help='Whether use camera or not',
         action='store_true')
     parser.add_argument(
+        '--camera_id',
+        dest='camera_id',
+        type=int,
+        help='The camera id',
+        default=0)
+    parser.add_argument(
         '--use_erode',
         dest='use_erode',
         help='Whether erode the predicted lable map',
@@ -188,9 +194,9 @@ class MeterReader:
                 thread_num=seg_thread_num)
             if use_erode:
                 kernel = np.ones((erode_kernel, erode_kernel), np.uint8)
-                for i in range(len(seg_results)):
-                    result[i]['label_map'] = cv2.erode(
-                        result[i]['label_map'], kernel)
+                for i in range(len(result)):
+                    result[i]['label_map'] = cv2.erode(result[i]['label_map'],
+                                                       kernel)
             seg_results.extend(result)
 
         results = list()
@@ -330,7 +336,7 @@ def infer(args):
                                  args.erode_kernel, args.score_threshold,
                                  args.seg_batch_size, args.seg_thread_num)
     elif args.with_camera:
-        cap_video = cv2.VideoCapture(0)
+        cap_video = cv2.VideoCapture(args.camera_id)
         if not cap_video.isOpened():
             raise Exception(
                 "Error opening video stream, please make sure the camera is working"

BIN
examples/meter_reader/image/MeterReader_Architecture.jpg


+ 10 - 4
examples/meter_reader/reader_infer.py

@@ -71,6 +71,12 @@ def parse_args():
         help='Whether use camera or not',
         action='store_true')
     parser.add_argument(
+        '--camera_id',
+        dest='camera_id',
+        type=int,
+        help='The camera id',
+        default=0)
+    parser.add_argument(
         '--use_erode',
         dest='use_erode',
         help='Whether erode the predicted lable map',
@@ -189,8 +195,8 @@ class MeterReader:
             if use_erode:
                 kernel = np.ones((erode_kernel, erode_kernel), np.uint8)
                 for i in range(len(result)):
-                    result[i]['label_map'] = cv2.erode(
-                        result[i]['label_map'], kernel)
+                    result[i]['label_map'] = cv2.erode(result[i]['label_map'],
+                                                       kernel)
             seg_results.extend(result)
 
         results = list()
@@ -322,7 +328,7 @@ def infer(args):
                 continue
             im_file = osp.join(args.image_dir, im_file)
             image_lists.append(im_file)
-    
+
     meter_reader = MeterReader(args.detector_dir, args.segmenter_dir)
     if len(image_lists) > 0:
         for im_file in image_lists:
@@ -330,7 +336,7 @@ def infer(args):
                                  args.erode_kernel, args.score_threshold,
                                  args.seg_batch_size, args.seg_thread_num)
     elif args.with_camera:
-        cap_video = cv2.VideoCapture(0)
+        cap_video = cv2.VideoCapture(args.camera_id)
         if not cap_video.isOpened():
             raise Exception(
                 "Error opening video stream, please make sure the camera is working"

+ 16 - 6
examples/meter_reader/train_detection.py

@@ -10,14 +10,24 @@ meter_det_dataset = 'https://bj.bcebos.com/paddlex/meterreader/datasets/meter_de
 pdx.utils.download_and_decompress(meter_det_dataset, path='./')
 
 # 定义训练和验证时的transforms
-# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/det_transforms.html#composedyolotransforms
-train_transforms = transforms.ComposedYOLOv3Transforms(
-    mode='train', shape=[608, 608])
-eval_transforms = transforms.ComposedYOLOv3Transforms(
-    mode='eval', shape=[608, 608])
+train_transforms = transforms.Compose([
+    transforms.MixupImage(mixup_epoch=250),
+    transforms.RandomDistort(),
+    transforms.RandomExpand(),
+    transforms.RandomCrop(),
+    transforms.Resize(
+        target_size=608, interp='RANDOM'),
+    transforms.RandomHorizontalFlip(),
+    transforms.Normalize(),
+])
+
+eval_transforms = transforms.Compose([
+    transforms.Resize(
+        target_size=608, interp='CUBIC'),
+    transforms.Normalize(),
+])
 
 # 定义训练和验证所用的数据集
-# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/datasets/detection.html#vocdetection
 train_dataset = pdx.datasets.CocoDetection(
     data_dir='meter_det/train/',
     ann_file='meter_det/annotations/instance_train.json',

+ 1 - 1
paddlex/cv/datasets/__init__.py

@@ -19,4 +19,4 @@ from .seg_dataset import SegDataset
 from .easydata_cls import EasyDataCls
 from .easydata_det import EasyDataDet
 from .easydata_seg import EasyDataSeg
-from .dataset import GenerateMiniBatch
+from .dataset import generate_minibatch

+ 20 - 7
paddlex/cv/datasets/dataset.py

@@ -114,7 +114,7 @@ def multithread_reader(mapper,
         while not isinstance(sample, EndSignal):
             batch_data.append(sample)
             if len(batch_data) == batch_size:
-                batch_data = GenerateMiniBatch(batch_data)
+                batch_data = generate_minibatch(batch_data)
                 yield batch_data
                 batch_data = []
             sample = out_queue.get()
@@ -126,11 +126,11 @@ def multithread_reader(mapper,
             else:
                 batch_data.append(sample)
                 if len(batch_data) == batch_size:
-                    batch_data = GenerateMiniBatch(batch_data)
+                    batch_data = generate_minibatch(batch_data)
                     yield batch_data
                     batch_data = []
         if not drop_last and len(batch_data) != 0:
-            batch_data = GenerateMiniBatch(batch_data)
+            batch_data = generate_minibatch(batch_data)
             yield batch_data
             batch_data = []
 
@@ -187,49 +187,62 @@ def multiprocess_reader(mapper,
             else:
                 batch_data.append(sample)
                 if len(batch_data) == batch_size:
-                    batch_data = GenerateMiniBatch(batch_data)
+                    batch_data = generate_minibatch(batch_data)
                     yield batch_data
                     batch_data = []
         if len(batch_data) != 0 and not drop_last:
-            batch_data = GenerateMiniBatch(batch_data)
+            batch_data = generate_minibatch(batch_data)
             yield batch_data
             batch_data = []
 
     return queue_reader
 
 
-def GenerateMiniBatch(batch_data):
+def generate_minibatch(batch_data, label_padding_value=255):
+    # if batch_size is 1, do not pad the image
     if len(batch_data) == 1:
         return batch_data
     width = [data[0].shape[2] for data in batch_data]
     height = [data[0].shape[1] for data in batch_data]
+    # if the sizes of images in a mini-batch are equal,
+    # do not pad the image
     if len(set(width)) == 1 and len(set(height)) == 1:
         return batch_data
     max_shape = np.array([data[0].shape for data in batch_data]).max(axis=0)
     padding_batch = []
     for data in batch_data:
+        # pad the image to a same size
         im_c, im_h, im_w = data[0].shape[:]
         padding_im = np.zeros(
             (im_c, max_shape[1], max_shape[2]), dtype=np.float32)
         padding_im[:, :im_h, :im_w] = data[0]
         if len(data) > 1:
             if isinstance(data[1], np.ndarray):
+                # padding the image and label of segmentation
+                # during the training  and evaluating phase
                 padding_label = np.zeros(
-                    (1, max_shape[1], max_shape[2])).astype('int64')
+                    (1, max_shape[1], max_shape[2]
+                     )).astype('int64') + label_padding_value
                 _, label_h, label_w = data[1].shape
                 padding_label[:, :label_h, :label_w] = data[1]
                 padding_batch.append((padding_im, padding_label))
             elif len(data[1]) == 0 or isinstance(
                     data[1][0],
                     tuple) and data[1][0][0] in ['resize', 'padding']:
+                # padding the image and insert 'padding' into `im_info`
+                # of segmentation during the infering phase
                 if len(data[1]) == 0 or 'padding' not in [
                         data[1][i][0] for i in range(len(data[1]))
                 ]:
                     data[1].append(('padding', [im_h, im_w]))
                 padding_batch.append((padding_im, ) + tuple(data[1:]))
             else:
+                # padding the image of detection, or
+                # padding the image of classification during the trainging
+                # and evaluating phase
                 padding_batch.append((padding_im, ) + tuple(data[1:]))
         else:
+            # padding the image of classification during the infering phase
             padding_batch.append((padding_im))
     return padding_batch
 

+ 2 - 2
paddlex/cv/models/classifier.py

@@ -23,7 +23,7 @@ import paddlex.utils.logging as logging
 from paddlex.utils import seconds_to_hms
 import paddlex
 from paddlex.cv.transforms import arrange_transforms
-from paddlex.cv.datasets import GenerateMiniBatch
+from paddlex.cv.datasets import generate_minibatch
 from collections import OrderedDict
 from .base import BaseAPI
 
@@ -289,7 +289,7 @@ class BaseClassifier(BaseAPI):
         batch_data = pool.map(transforms, images)
         pool.close()
         pool.join()
-        padding_batch = GenerateMiniBatch(batch_data)
+        padding_batch = generate_minibatch(batch_data)
         im = np.array([data[0] for data in padding_batch])
 
         return im

+ 3 - 12
paddlex/cv/models/deeplabv3p.py

@@ -23,7 +23,7 @@ import paddle.fluid as fluid
 import paddlex.utils.logging as logging
 import paddlex
 from paddlex.cv.transforms import arrange_transforms
-from paddlex.cv.datasets import GenerateMiniBatch
+from paddlex.cv.datasets import generate_minibatch
 from collections import OrderedDict
 from .base import BaseAPI
 from .utils.seg_eval import ConfusionMatrix
@@ -340,16 +340,7 @@ class DeepLabv3p(BaseAPI):
         for step, data in tqdm.tqdm(
                 enumerate(data_generator()), total=total_steps):
             images = np.array([d[0] for d in data])
-
-            _, _, im_h, im_w = images.shape
-            labels = list()
-            for d in data:
-                padding_label = np.zeros(
-                    (1, im_h, im_w)).astype('int64') + self.ignore_index
-                _, label_h, label_w = d[1].shape
-                padding_label[:, :label_h, :label_w] = d[1]
-                labels.append(padding_label)
-            labels = np.array(labels)
+            labels = np.array([d[1] for d in data])
 
             num_samples = images.shape[0]
             if num_samples < batch_size:
@@ -398,7 +389,7 @@ class DeepLabv3p(BaseAPI):
         batch_data = pool.map(transforms, images)
         pool.close()
         pool.join()
-        padding_batch = GenerateMiniBatch(batch_data)
+        padding_batch = generate_minibatch(batch_data)
         im = np.array(
             [data[0] for data in padding_batch],
             dtype=padding_batch[0][0].dtype)

+ 6 - 24
paddlex/cv/models/faster_rcnn.py

@@ -23,7 +23,7 @@ import paddlex
 import os.path as osp
 import copy
 from paddlex.cv.transforms import arrange_transforms
-from paddlex.cv.datasets import GenerateMiniBatch
+from paddlex.cv.datasets import generate_minibatch
 from .base import BaseAPI
 from collections import OrderedDict
 from .utils.detection_eval import eval_results, bbox2out
@@ -109,24 +109,6 @@ class FasterRCNN(BaseAPI):
                 freeze_at=2)
         return backbone
 
-    def _generate_mini_batch(self, batch_data):
-        if len(batch_data) == 1:
-            return batch_data
-        width = [data[0].shape[2] for data in batch_data]
-        height = [data[0].shape[1] for data in batch_data]
-        if len(set(width)) == 1 and len(set(height)) == 1:
-            return batch_data
-        max_shape = np.array([data[0].shape for data in batch_data]).max(
-            axis=0)
-        padding_batch = []
-        for data in batch_data:
-            im_c, im_h, im_w = data[0].shape[:]
-            padding_im = np.zeros(
-                (im_c, max_shape[1], max_shape[2]), dtype=np.float32)
-            padding_im[:, :im_h, :im_w] = data[0]
-            padding_batch.append((padding_im, ) + data[1:])
-        return padding_batch
-
     def build_net(self, mode='train'):
         train_pre_nms_top_n = 2000 if self.with_fpn else 12000
         test_pre_nms_top_n = 1000 if self.with_fpn else 6000
@@ -404,7 +386,7 @@ class FasterRCNN(BaseAPI):
         batch_data = pool.map(transforms, images)
         pool.close()
         pool.join()
-        padding_batch = GenerateMiniBatch(batch_data)
+        padding_batch = generate_minibatch(batch_data)
         im = np.array([data[0] for data in padding_batch])
         im_resize_info = np.array([data[1] for data in padding_batch])
         im_shape = np.array([data[2] for data in padding_batch])
@@ -461,10 +443,10 @@ class FasterRCNN(BaseAPI):
 
         res = {
             k: (np.array(v), v.recursive_sequence_lengths())
-            for k, v in zip(list(test_outputs_keys), results)
+            for k, v in zip(list(self.test_outputs.keys()), result)
         }
         res['im_id'] = (np.array(
-            [[i] for i in range(batch_size)]).astype('int32'), [])
+            [[i] for i in range(len(images))]).astype('int32'), [])
         preds = FasterRCNN._postprocess(res,
                                         len(images), self.num_classes,
                                         self.labels)
@@ -510,10 +492,10 @@ class FasterRCNN(BaseAPI):
 
         res = {
             k: (np.array(v), v.recursive_sequence_lengths())
-            for k, v in zip(list(test_outputs_keys), results)
+            for k, v in zip(list(self.test_outputs.keys()), result)
         }
         res['im_id'] = (np.array(
-            [[i] for i in range(batch_size)]).astype('int32'), [])
+            [[i] for i in range(len(img_file_list))]).astype('int32'), [])
         preds = FasterRCNN._postprocess(res,
                                         len(img_file_list), self.num_classes,
                                         self.labels)

+ 6 - 5
paddlex/cv/models/mask_rcnn.py

@@ -22,6 +22,7 @@ import paddlex.utils.logging as logging
 import paddlex
 import copy
 import os.path as osp
+from paddlex.cv.transforms import arrange_transforms
 from collections import OrderedDict
 from .faster_rcnn import FasterRCNN
 from .utils.detection_eval import eval_results, bbox2out, mask2out
@@ -396,10 +397,10 @@ class MaskRCNN(FasterRCNN):
 
         res = {
             k: (np.array(v), v.recursive_sequence_lengths())
-            for k, v in zip(list(test_outputs_keys), results)
+            for k, v in zip(lists(self.test_outputs.keys()), result)
         }
         res['im_id'] = (np.array(
-            [[i] for i in range(batch_size)]).astype('int32'), [])
+            [[i] for i in range(len(images))]).astype('int32'), [])
         res['im_shape'] = (np.array(im_shape), [])
         preds = MaskRCNN._postprocess(res,
                                       len(images), self.num_classes,
@@ -446,12 +447,12 @@ class MaskRCNN(FasterRCNN):
 
         res = {
             k: (np.array(v), v.recursive_sequence_lengths())
-            for k, v in zip(list(test_outputs_keys), results)
+            for k, v in zip(list(self.test_outputs.keys()), result)
         }
         res['im_id'] = (np.array(
-            [[i] for i in range(batch_size)]).astype('int32'), [])
+            [[i] for i in range(len(img_file_list))]).astype('int32'), [])
         res['im_shape'] = (np.array(im_shape), [])
         preds = MaskRCNN._postprocess(res,
-                                      len(images), self.num_classes,
+                                      len(img_file_list), self.num_classes,
                                       self.mask_head_resolution, self.labels)
         return preds

+ 2 - 2
paddlex/cv/models/yolo_v3.py

@@ -23,7 +23,7 @@ import paddlex.utils.logging as logging
 import paddlex
 import copy
 from paddlex.cv.transforms import arrange_transforms
-from paddlex.cv.datasets import GenerateMiniBatch
+from paddlex.cv.datasets import generate_minibatch
 from .base import BaseAPI
 from collections import OrderedDict
 from .utils.detection_eval import eval_results, bbox2out
@@ -364,7 +364,7 @@ class YOLOv3(BaseAPI):
         batch_data = pool.map(transforms, images)
         pool.close()
         pool.join()
-        padding_batch = GenerateMiniBatch(batch_data)
+        padding_batch = generate_minibatch(batch_data)
         im = np.array(
             [data[0] for data in padding_batch],
             dtype=padding_batch[0][0].dtype)

+ 10 - 7
paddlex/cv/transforms/cls_transforms.py

@@ -68,13 +68,14 @@ class Compose(ClsTransform):
         if isinstance(im, np.ndarray):
             if len(im.shape) != 3:
                 raise Exception(
-                    "im should be 3-dimension, but now is {}-dimensions".format(
-                        len(im.shape)))
+                    "im should be 3-dimension, but now is {}-dimensions".
+                    format(len(im.shape)))
         else:
             try:
-                im = cv2.imread(im).astype('float32')
+                im = cv2.imread(im)
             except:
                 raise TypeError('Can\'t read The image file {}!'.format(im))
+        im = im.astype('float32')
         im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
         for op in self.transforms:
             if isinstance(op, ClsTransform):
@@ -139,8 +140,8 @@ class RandomCrop(ClsTransform):
             tuple: 当label为空时,返回的tuple为(im, ),对应图像np.ndarray数据;
                    当label不为空时,返回的tuple为(im, label),分别对应图像np.ndarray数据、图像类别id。
         """
-        im = random_crop(im, self.crop_size, self.lower_scale, self.lower_ratio,
-                         self.upper_ratio)
+        im = random_crop(im, self.crop_size, self.lower_scale,
+                         self.lower_ratio, self.upper_ratio)
         if label is None:
             return (im, )
         else:
@@ -270,12 +271,14 @@ class ResizeByShort(ClsTransform):
         im_short_size = min(im.shape[0], im.shape[1])
         im_long_size = max(im.shape[0], im.shape[1])
         scale = float(self.short_size) / im_short_size
-        if self.max_size > 0 and np.round(scale * im_long_size) > self.max_size:
+        if self.max_size > 0 and np.round(scale *
+                                          im_long_size) > self.max_size:
             scale = float(self.max_size) / float(im_long_size)
         resized_width = int(round(im.shape[1] * scale))
         resized_height = int(round(im.shape[0] * scale))
         im = cv2.resize(
-            im, (resized_width, resized_height), interpolation=cv2.INTER_LINEAR)
+            im, (resized_width, resized_height),
+            interpolation=cv2.INTER_LINEAR)
 
         if label is None:
             return (im, )

+ 2 - 1
paddlex/cv/transforms/det_transforms.py

@@ -108,10 +108,11 @@ class Compose(DetTransform):
                 im = im_file
             else:
                 try:
-                    im = cv2.imread(im_file).astype('float32')
+                    im = cv2.imread(im_file)
                 except:
                     raise TypeError('Can\'t read The image file {}!'.format(
                         im_file))
+            im = im.astype('float32')
             im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
             # make default im_info with [h, w, 1]
             im_info['im_resize_info'] = np.array(

+ 4 - 2
paddlex/cv/transforms/seg_transforms.py

@@ -81,9 +81,10 @@ class Compose(SegTransform):
                     format(len(im.shape)))
         else:
             try:
-                im = cv2.imread(im).astype('float32')
+                im = cv2.imread(im)
             except:
                 raise ValueError('Can\'t read The image file {}!'.format(im))
+        im = im.astype('float32')
         if self.to_rgb:
             im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
         if label is not None:
@@ -399,7 +400,8 @@ class ResizeByShort(SegTransform):
         im_short_size = min(im.shape[0], im.shape[1])
         im_long_size = max(im.shape[0], im.shape[1])
         scale = float(self.short_size) / im_short_size
-        if self.max_size > 0 and np.round(scale * im_long_size) > self.max_size:
+        if self.max_size > 0 and np.round(scale *
+                                          im_long_size) > self.max_size:
             scale = float(self.max_size) / float(im_long_size)
         resized_width = int(round(im.shape[1] * scale))
         resized_height = int(round(im.shape[0] * scale))

+ 1 - 1
paddlex/deploy.py

@@ -144,7 +144,7 @@ class Predictor:
                 res['im_info'] = im_resize_info
                 res['im_shape'] = im_shape
         elif self.model_type == "segmenter":
-            im, im_imfo = DeepLabv3p._preprocess(
+            im, im_info = DeepLabv3p._preprocess(
                 image,
                 self.transforms,
                 self.model_type,