5 年之前 · 6ba16d592c
--- a/deploy/cpp/cmake/yaml-cpp.cmake
+++ b/deploy/cpp/cmake/yaml-cpp.cmake
@@ -1,5 +1,3 @@
 
				-find_package(Git REQUIRED)

			
 
				-

			
 
				 include(ExternalProject)

			
 
				 

			
 
				 message("${CMAKE_BUILD_TYPE}")

			
--- a/deploy/cpp/demo/classifier.cpp
+++ b/deploy/cpp/demo/classifier.cpp
@@ -37,6 +37,7 @@ DEFINE_int32(batch_size, 1, "Batch size of infering");
 
				 DEFINE_int32(thread_num,

			
 
				              omp_get_num_procs(),

			
 
				              "Number of preprocessing threads");

			
 
				+DEFINE_bool(use_ir_optim, true, "use ir optimization");

			
 
				 

			
 
				 int main(int argc, char** argv) {

			
 
				   // Parsing command-line

			
@@ -58,7 +59,7 @@ int main(int argc, char** argv) {
 
				              FLAGS_use_trt,

			
 
				              FLAGS_gpu_id,

			
 
				              FLAGS_key,

			
 
				-             FLAGS_batch_size);

			
 
				+             FLAGS_use_ir_optim);

			
 
				 

			
 
				   // 进行预测

			
 
				   double total_running_time_s = 0.0;

			
--- a/deploy/cpp/demo/detector.cpp
+++ b/deploy/cpp/demo/detector.cpp
@@ -43,6 +43,7 @@ DEFINE_double(threshold,
 
				 DEFINE_int32(thread_num,

			
 
				              omp_get_num_procs(),

			
 
				              "Number of preprocessing threads");

			
 
				+DEFINE_bool(use_ir_optim, true, "use ir optimization");

			
 
				 

			
 
				 int main(int argc, char** argv) {

			
 
				   // 解析命令行参数

			
@@ -63,7 +64,7 @@ int main(int argc, char** argv) {
 
				              FLAGS_use_trt,

			
 
				              FLAGS_gpu_id,

			
 
				              FLAGS_key,

			
 
				-             FLAGS_batch_size);

			
 
				+             FLAGS_use_ir_optim);

			
 
				 

			
 
				   double total_running_time_s = 0.0;

			
 
				   double total_imread_time_s = 0.0;

			
--- a/deploy/cpp/demo/segmenter.cpp
+++ b/deploy/cpp/demo/segmenter.cpp
@@ -39,6 +39,7 @@ DEFINE_int32(batch_size, 1, "Batch size of infering");
 
				 DEFINE_int32(thread_num,

			
 
				              omp_get_num_procs(),

			
 
				              "Number of preprocessing threads");

			
 
				+DEFINE_bool(use_ir_optim, false, "use ir optimization");

			
 
				 

			
 
				 int main(int argc, char** argv) {

			
 
				   // 解析命令行参数

			
@@ -60,7 +61,7 @@ int main(int argc, char** argv) {
 
				              FLAGS_use_trt,

			
 
				              FLAGS_gpu_id,

			
 
				              FLAGS_key,

			
 
				-             FLAGS_batch_size);

			
 
				+             FLAGS_use_ir_optim);

			
 
				 

			
 
				   double total_running_time_s = 0.0;

			
 
				   double total_imread_time_s = 0.0;

			
--- a/deploy/cpp/include/paddlex/paddlex.h
+++ b/deploy/cpp/include/paddlex/paddlex.h
@@ -72,15 +72,15 @@ class Model {
 
				    * @param use_trt: use Tensor RT or not when infering

			
 
				    * @param gpu_id: the id of gpu when infering with using gpu

			
 
				    * @param key: the key of encryption when using encrypted model

			
 
				-   * @param batch_size: batch size of infering

			
 
				+   * @param use_ir_optim: use ir optimization when infering

			
 
				    * */

			
 
				   void Init(const std::string& model_dir,

			
 
				             bool use_gpu = false,

			
 
				             bool use_trt = false,

			
 
				             int gpu_id = 0,

			
 
				             std::string key = "",

			
 
				-            int batch_size = 1) {

			
 
				-    create_predictor(model_dir, use_gpu, use_trt, gpu_id, key, batch_size);

			
 
				+            bool use_ir_optim = true) {

			
 
				+    create_predictor(model_dir, use_gpu, use_trt, gpu_id, key, use_ir_optim);

			
 
				   }

			
 
				 

			
 
				   void create_predictor(const std::string& model_dir,

			
@@ -88,7 +88,7 @@ class Model {
 
				                         bool use_trt = false,

			
 
				                         int gpu_id = 0,

			
 
				                         std::string key = "",

			
 
				-                        int batch_size = 1);

			
 
				+                        bool use_ir_optim = true);

			
 
				 

			
 
				   /*

			
 
				    * @brief

			
--- a/deploy/cpp/include/paddlex/visualize.h
+++ b/deploy/cpp/include/paddlex/visualize.h
@@ -22,9 +22,14 @@
 
				 #include <io.h>

			
 
				 #else  // Linux/Unix

			
 
				 #include <dirent.h>

			
 
				-#include <sys/io.h>

			
 
				+// #include <sys/io.h>

			
 
				+#ifdef __arm__  // for arm

			
 
				+#include <aarch64-linux-gpu/sys/stat.h>

			
 
				+#include <aarch64-linux-gpu/sys/types.h>

			
 
				+#else

			
 
				 #include <sys/stat.h>

			
 
				 #include <sys/types.h>

			
 
				+#endif

			
 
				 #include <unistd.h>

			
 
				 #endif

			
 
				 #include <string>

			
--- a/deploy/cpp/scripts/jetson_bootstrap.sh
+++ b/deploy/cpp/scripts/jetson_bootstrap.sh
@@ -0,0 +1,10 @@
 
				+# download pre-compiled opencv lib
			
 
				+OPENCV_URL=https://bj.bcebos.com/paddlex/deploy/tools/opencv3_aarch.tgz
			
 
				+if [ ! -d "./deps/opencv3" ]; then
			
 
				+    mkdir -p deps
			
 
				+    cd deps
			
 
				+    wget -c ${OPENCV_URL}
			
 
				+    tar xvfj opencv3_aarch.tgz
			
 
				+    rm -rf opencv3_aarch.tgz
			
 
				+    cd ..
			
 
				+fi
			
--- a/deploy/cpp/scripts/jetson_build.sh
+++ b/deploy/cpp/scripts/jetson_build.sh
@@ -0,0 +1,42 @@
 
				+# 是否使用GPU(即是否使用 CUDA)
			
 
				+WITH_GPU=OFF
			
 
				+# 使用MKL or openblas
			
 
				+WITH_MKL=OFF
			
 
				+# 是否集成 TensorRT(仅WITH_GPU=ON 有效)
			
 
				+WITH_TENSORRT=OFF
			
 
				+# TensorRT 的路径，如果需要集成TensorRT，需修改为您实际安装的TensorRT路径
			
 
				+TENSORRT_DIR=/root/projects/TensorRT/
			
 
				+# Paddle 预测库路径, 请修改为您实际安装的预测库路径
			
 
				+PADDLE_DIR=/root/projects/fluid_inference
			
 
				+# Paddle 的预测库是否使用静态库来编译
			
 
				+# 使用TensorRT时，Paddle的预测库通常为动态库
			
 
				+WITH_STATIC_LIB=OFF
			
 
				+# CUDA 的 lib 路径
			
 
				+CUDA_LIB=/usr/local/cuda/lib64
			
 
				+# CUDNN 的 lib 路径
			
 
				+CUDNN_LIB=/usr/local/cuda/lib64
			
 
				+
			
 
				+# 是否加载加密后的模型
			
 
				+WITH_ENCRYPTION=OFF
			
 
				+
			
 
				+# OPENCV 路径, 如果使用自带预编译版本可不修改
			
 
				+sh $(pwd)/scripts/jetson_bootstrap.sh  # 下载预编译版本的opencv
			
 
				+OPENCV_DIR=$(pwd)/deps/opencv3
			
 
				+
			
 
				+# 以下无需改动
			
 
				+rm -rf build
			
 
				+mkdir -p build
			
 
				+cd build
			
 
				+cmake .. \
			
 
				+    -DWITH_GPU=${WITH_GPU} \
			
 
				+    -DWITH_MKL=${WITH_MKL} \
			
 
				+    -DWITH_TENSORRT=${WITH_TENSORRT} \
			
 
				+    -DWITH_ENCRYPTION=${WITH_ENCRYPTION} \
			
 
				+    -DTENSORRT_DIR=${TENSORRT_DIR} \
			
 
				+    -DPADDLE_DIR=${PADDLE_DIR} \
			
 
				+    -DWITH_STATIC_LIB=${WITH_STATIC_LIB} \
			
 
				+    -DCUDA_LIB=${CUDA_LIB} \
			
 
				+    -DCUDNN_LIB=${CUDNN_LIB} \
			
 
				+    -DENCRYPTION_DIR=${ENCRYPTION_DIR} \
			
 
				+    -DOPENCV_DIR=${OPENCV_DIR}
			
 
				+make
			
--- a/deploy/cpp/src/paddlex.cpp
+++ b/deploy/cpp/src/paddlex.cpp
@@ -23,7 +23,7 @@ void Model::create_predictor(const std::string& model_dir,
 
				                              bool use_trt,

			
 
				                              int gpu_id,

			
 
				                              std::string key,

			
 
				-                             int batch_size) {

			
 
				+                             bool use_ir_optim) {

			
 
				   paddle::AnalysisConfig config;

			
 
				   std::string model_file = model_dir + OS_PATH_SEP + "__model__";

			
 
				   std::string params_file = model_dir + OS_PATH_SEP + "__params__";

			
@@ -64,6 +64,8 @@ void Model::create_predictor(const std::string& model_dir,
 
				   }

			
 
				   config.SwitchUseFeedFetchOps(false);

			
 
				   config.SwitchSpecifyInputNames(true);

			
 
				+  // 开启图优化

			
 
				+  config.SwitchIrOptim(use_ir_optim);

			
 
				   // 开启内存优化

			
 
				   config.EnableMemoryOptim();

			
 
				   if (use_trt) {

			
@@ -76,7 +78,6 @@ void Model::create_predictor(const std::string& model_dir,
 
				         false /* use_calib_mode*/);

			
 
				   }

			
 
				   predictor_ = std::move(CreatePaddlePredictor(config));

			
 
				-  inputs_batch_.assign(batch_size, ImageBlob());

			
 
				 }

			
 
				 

			
 
				 bool Model::load_config(const std::string& yaml_input) {

			
@@ -192,6 +193,7 @@ bool Model::predict(const std::vector<cv::Mat>& im_batch,
 
				                  "to function predict()!" << std::endl;

			
 
				     return false;

			
 
				   }

			
 
				+  inputs_batch_.assign(im_batch.size(), ImageBlob());

			
 
				   // 处理输入图像

			
 
				   if (!preprocess(im_batch, &inputs_batch_, thread_num)) {

			
 
				     std::cerr << "Preprocess failed!" << std::endl;

			
@@ -356,6 +358,7 @@ bool Model::predict(const std::vector<cv::Mat>& im_batch,
 
				     return false;

			
 
				   }

			
 
				 

			
 
				+  inputs_batch_.assign(im_batch.size(), ImageBlob());

			
 
				   int batch_size = im_batch.size();

			
 
				   // 处理输入图像

			
 
				   if (!preprocess(im_batch, &inputs_batch_, thread_num)) {

			
@@ -637,6 +640,7 @@ bool Model::predict(const std::vector<cv::Mat>& im_batch,
 
				   }

			
 
				 

			
 
				   // 处理输入图像

			
 
				+  inputs_batch_.assign(im_batch.size(), ImageBlob());

			
 
				   if (!preprocess(im_batch, &inputs_batch_, thread_num)) {

			
 
				     std::cerr << "Preprocess failed!" << std::endl;

			
 
				     return false;

			
--- a/paddlex/cv/transforms/visualize.py
+++ b/paddlex/cv/transforms/visualize.py
@@ -73,7 +73,7 @@ def cls_compose(im, label=None, transforms=None, vdl_writer=None, step=0):
 
				                 raise TypeError('Can\'t read The image file {}!'.format(im))
			
 
				         im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
			
 
				         if vdl_writer is not None:
			
 
				-            vdl_writer.add_image(tag='0. OriginalImange/' +  str(step),
			
 
				+            vdl_writer.add_image(tag='0. OriginalImage/' +  str(step),
			
 
				                                  img=im,
			
 
				                                  step=0)
			
 
				         op_id = 1
			
@@ -148,7 +148,7 @@ def det_compose(im, im_info=None, label_info=None, transforms=None, vdl_writer=N
 
				         if len(outputs) == 3:
			
 
				             label_info = outputs[2]
			
 
				         if vdl_writer is not None:
			
 
				-            vdl_writer.add_image(tag='0. OriginalImange/' +  str(step),
			
 
				+            vdl_writer.add_image(tag='0. OriginalImage/' +  str(step),
			
 
				                                  img=im,
			
 
				                                  step=0)
			
 
				         op_id = 1
			
@@ -209,7 +209,7 @@ def det_compose(im, im_info=None, label_info=None, transforms=None, vdl_writer=N
 
				             if vdl_writer is not None:
			
 
				                 tag = str(op_id) + '. ' + op.__class__.__name__ + '/' +  str(step)
			
 
				                 if op is None:
			
 
				-                    tag = str(op_id) + '. OriginalImangeWithGTBox/' +  str(step)
			
 
				+                    tag = str(op_id) + '. OriginalImageWithGTBox/' +  str(step)
			
 
				                 vdl_writer.add_image(tag=tag,
			
 
				                                      img=vdl_im,
			
 
				                                      step=0)
			
@@ -233,7 +233,7 @@ def seg_compose(im, im_info=None, label=None, transforms=None, vdl_writer=None,
 
				         if not isinstance(label, np.ndarray):
			
 
				             label = np.asarray(Image.open(label))
			
 
				     if vdl_writer is not None:
			
 
				-        vdl_writer.add_image(tag='0. OriginalImange' + '/' +  str(step),
			
 
				+        vdl_writer.add_image(tag='0. OriginalImage' + '/' +  str(step),
			
 
				                              img=im,
			
 
				                              step=0)
			
 
				     op_id = 1
			
@@ -303,4 +303,4 @@ def visualize(dataset, img_count=3, save_dir='vdl_output'):
 
				             seg_compose(*data)
			
 
				         else:
			
 
				             raise Exception('The transform must the subclass of \
			
 
				-                    ClsTransform or DetTransform or SegTransform!')
			
 
				+                    ClsTransform or DetTransform or SegTransform!')