瀏覽代碼

Merge branch 'develop_del_doc' into doc1

Jason 5 年之前
父節點
當前提交
6ba16d592c

+ 0 - 2
deploy/cpp/cmake/yaml-cpp.cmake

@@ -1,5 +1,3 @@
-find_package(Git REQUIRED)
-
 include(ExternalProject)
 
 message("${CMAKE_BUILD_TYPE}")

+ 2 - 1
deploy/cpp/demo/classifier.cpp

@@ -37,6 +37,7 @@ DEFINE_int32(batch_size, 1, "Batch size of infering");
 DEFINE_int32(thread_num,
              omp_get_num_procs(),
              "Number of preprocessing threads");
+DEFINE_bool(use_ir_optim, true, "use ir optimization");
 
 int main(int argc, char** argv) {
   // Parsing command-line
@@ -58,7 +59,7 @@ int main(int argc, char** argv) {
              FLAGS_use_trt,
              FLAGS_gpu_id,
              FLAGS_key,
-             FLAGS_batch_size);
+             FLAGS_use_ir_optim);
 
   // 进行预测
   double total_running_time_s = 0.0;

+ 2 - 1
deploy/cpp/demo/detector.cpp

@@ -43,6 +43,7 @@ DEFINE_double(threshold,
 DEFINE_int32(thread_num,
              omp_get_num_procs(),
              "Number of preprocessing threads");
+DEFINE_bool(use_ir_optim, true, "use ir optimization");
 
 int main(int argc, char** argv) {
   // 解析命令行参数
@@ -63,7 +64,7 @@ int main(int argc, char** argv) {
              FLAGS_use_trt,
              FLAGS_gpu_id,
              FLAGS_key,
-             FLAGS_batch_size);
+             FLAGS_use_ir_optim);
 
   double total_running_time_s = 0.0;
   double total_imread_time_s = 0.0;

+ 2 - 1
deploy/cpp/demo/segmenter.cpp

@@ -39,6 +39,7 @@ DEFINE_int32(batch_size, 1, "Batch size of infering");
 DEFINE_int32(thread_num,
              omp_get_num_procs(),
              "Number of preprocessing threads");
+DEFINE_bool(use_ir_optim, false, "use ir optimization");
 
 int main(int argc, char** argv) {
   // 解析命令行参数
@@ -60,7 +61,7 @@ int main(int argc, char** argv) {
              FLAGS_use_trt,
              FLAGS_gpu_id,
              FLAGS_key,
-             FLAGS_batch_size);
+             FLAGS_use_ir_optim);
 
   double total_running_time_s = 0.0;
   double total_imread_time_s = 0.0;

+ 4 - 4
deploy/cpp/include/paddlex/paddlex.h

@@ -72,15 +72,15 @@ class Model {
    * @param use_trt: use Tensor RT or not when infering
    * @param gpu_id: the id of gpu when infering with using gpu
    * @param key: the key of encryption when using encrypted model
-   * @param batch_size: batch size of infering
+   * @param use_ir_optim: use ir optimization when infering
    * */
   void Init(const std::string& model_dir,
             bool use_gpu = false,
             bool use_trt = false,
             int gpu_id = 0,
             std::string key = "",
-            int batch_size = 1) {
-    create_predictor(model_dir, use_gpu, use_trt, gpu_id, key, batch_size);
+            bool use_ir_optim = true) {
+    create_predictor(model_dir, use_gpu, use_trt, gpu_id, key, use_ir_optim);
   }
 
   void create_predictor(const std::string& model_dir,
@@ -88,7 +88,7 @@ class Model {
                         bool use_trt = false,
                         int gpu_id = 0,
                         std::string key = "",
-                        int batch_size = 1);
+                        bool use_ir_optim = true);
 
   /*
    * @brief

+ 6 - 1
deploy/cpp/include/paddlex/visualize.h

@@ -22,9 +22,14 @@
 #include <io.h>
 #else  // Linux/Unix
 #include <dirent.h>
-#include <sys/io.h>
+// #include <sys/io.h>
+#ifdef __arm__  // for arm
+#include <aarch64-linux-gpu/sys/stat.h>
+#include <aarch64-linux-gpu/sys/types.h>
+#else
 #include <sys/stat.h>
 #include <sys/types.h>
+#endif
 #include <unistd.h>
 #endif
 #include <string>

+ 10 - 0
deploy/cpp/scripts/jetson_bootstrap.sh

@@ -0,0 +1,10 @@
+# download pre-compiled opencv lib
+OPENCV_URL=https://bj.bcebos.com/paddlex/deploy/tools/opencv3_aarch.tgz
+if [ ! -d "./deps/opencv3" ]; then
+    mkdir -p deps
+    cd deps
+    wget -c ${OPENCV_URL}
+    tar xvfj opencv3_aarch.tgz
+    rm -rf opencv3_aarch.tgz
+    cd ..
+fi

+ 42 - 0
deploy/cpp/scripts/jetson_build.sh

@@ -0,0 +1,42 @@
+# 是否使用GPU(即是否使用 CUDA)
+WITH_GPU=OFF
+# 使用MKL or openblas
+WITH_MKL=OFF
+# 是否集成 TensorRT(仅WITH_GPU=ON 有效)
+WITH_TENSORRT=OFF
+# TensorRT 的路径,如果需要集成TensorRT,需修改为您实际安装的TensorRT路径
+TENSORRT_DIR=/root/projects/TensorRT/
+# Paddle 预测库路径, 请修改为您实际安装的预测库路径
+PADDLE_DIR=/root/projects/fluid_inference
+# Paddle 的预测库是否使用静态库来编译
+# 使用TensorRT时,Paddle的预测库通常为动态库
+WITH_STATIC_LIB=OFF
+# CUDA 的 lib 路径
+CUDA_LIB=/usr/local/cuda/lib64
+# CUDNN 的 lib 路径
+CUDNN_LIB=/usr/local/cuda/lib64
+
+# 是否加载加密后的模型
+WITH_ENCRYPTION=OFF
+
+# OPENCV 路径, 如果使用自带预编译版本可不修改
+sh $(pwd)/scripts/jetson_bootstrap.sh  # 下载预编译版本的opencv
+OPENCV_DIR=$(pwd)/deps/opencv3
+
+# 以下无需改动
+rm -rf build
+mkdir -p build
+cd build
+cmake .. \
+    -DWITH_GPU=${WITH_GPU} \
+    -DWITH_MKL=${WITH_MKL} \
+    -DWITH_TENSORRT=${WITH_TENSORRT} \
+    -DWITH_ENCRYPTION=${WITH_ENCRYPTION} \
+    -DTENSORRT_DIR=${TENSORRT_DIR} \
+    -DPADDLE_DIR=${PADDLE_DIR} \
+    -DWITH_STATIC_LIB=${WITH_STATIC_LIB} \
+    -DCUDA_LIB=${CUDA_LIB} \
+    -DCUDNN_LIB=${CUDNN_LIB} \
+    -DENCRYPTION_DIR=${ENCRYPTION_DIR} \
+    -DOPENCV_DIR=${OPENCV_DIR}
+make

+ 6 - 2
deploy/cpp/src/paddlex.cpp

@@ -23,7 +23,7 @@ void Model::create_predictor(const std::string& model_dir,
                              bool use_trt,
                              int gpu_id,
                              std::string key,
-                             int batch_size) {
+                             bool use_ir_optim) {
   paddle::AnalysisConfig config;
   std::string model_file = model_dir + OS_PATH_SEP + "__model__";
   std::string params_file = model_dir + OS_PATH_SEP + "__params__";
@@ -64,6 +64,8 @@ void Model::create_predictor(const std::string& model_dir,
   }
   config.SwitchUseFeedFetchOps(false);
   config.SwitchSpecifyInputNames(true);
+  // 开启图优化
+  config.SwitchIrOptim(use_ir_optim);
   // 开启内存优化
   config.EnableMemoryOptim();
   if (use_trt) {
@@ -76,7 +78,6 @@ void Model::create_predictor(const std::string& model_dir,
         false /* use_calib_mode*/);
   }
   predictor_ = std::move(CreatePaddlePredictor(config));
-  inputs_batch_.assign(batch_size, ImageBlob());
 }
 
 bool Model::load_config(const std::string& yaml_input) {
@@ -192,6 +193,7 @@ bool Model::predict(const std::vector<cv::Mat>& im_batch,
                  "to function predict()!" << std::endl;
     return false;
   }
+  inputs_batch_.assign(im_batch.size(), ImageBlob());
   // 处理输入图像
   if (!preprocess(im_batch, &inputs_batch_, thread_num)) {
     std::cerr << "Preprocess failed!" << std::endl;
@@ -356,6 +358,7 @@ bool Model::predict(const std::vector<cv::Mat>& im_batch,
     return false;
   }
 
+  inputs_batch_.assign(im_batch.size(), ImageBlob());
   int batch_size = im_batch.size();
   // 处理输入图像
   if (!preprocess(im_batch, &inputs_batch_, thread_num)) {
@@ -637,6 +640,7 @@ bool Model::predict(const std::vector<cv::Mat>& im_batch,
   }
 
   // 处理输入图像
+  inputs_batch_.assign(im_batch.size(), ImageBlob());
   if (!preprocess(im_batch, &inputs_batch_, thread_num)) {
     std::cerr << "Preprocess failed!" << std::endl;
     return false;

+ 5 - 5
paddlex/cv/transforms/visualize.py

@@ -73,7 +73,7 @@ def cls_compose(im, label=None, transforms=None, vdl_writer=None, step=0):
                 raise TypeError('Can\'t read The image file {}!'.format(im))
         im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
         if vdl_writer is not None:
-            vdl_writer.add_image(tag='0. OriginalImange/' +  str(step),
+            vdl_writer.add_image(tag='0. OriginalImage/' +  str(step),
                                  img=im,
                                  step=0)
         op_id = 1
@@ -148,7 +148,7 @@ def det_compose(im, im_info=None, label_info=None, transforms=None, vdl_writer=N
         if len(outputs) == 3:
             label_info = outputs[2]
         if vdl_writer is not None:
-            vdl_writer.add_image(tag='0. OriginalImange/' +  str(step),
+            vdl_writer.add_image(tag='0. OriginalImage/' +  str(step),
                                  img=im,
                                  step=0)
         op_id = 1
@@ -209,7 +209,7 @@ def det_compose(im, im_info=None, label_info=None, transforms=None, vdl_writer=N
             if vdl_writer is not None:
                 tag = str(op_id) + '. ' + op.__class__.__name__ + '/' +  str(step)
                 if op is None:
-                    tag = str(op_id) + '. OriginalImangeWithGTBox/' +  str(step)
+                    tag = str(op_id) + '. OriginalImageWithGTBox/' +  str(step)
                 vdl_writer.add_image(tag=tag,
                                      img=vdl_im,
                                      step=0)
@@ -233,7 +233,7 @@ def seg_compose(im, im_info=None, label=None, transforms=None, vdl_writer=None,
         if not isinstance(label, np.ndarray):
             label = np.asarray(Image.open(label))
     if vdl_writer is not None:
-        vdl_writer.add_image(tag='0. OriginalImange' + '/' +  str(step),
+        vdl_writer.add_image(tag='0. OriginalImage' + '/' +  str(step),
                              img=im,
                              step=0)
     op_id = 1
@@ -303,4 +303,4 @@ def visualize(dataset, img_count=3, save_dir='vdl_output'):
             seg_compose(*data)
         else:
             raise Exception('The transform must the subclass of \
-                    ClsTransform or DetTransform or SegTransform!')
+                    ClsTransform or DetTransform or SegTransform!')