Browse Source

support tensorrt and modify docs (#790)

* fix uint8_t error

* adjust deomo and engine config

* fix namespace

* modify demo 'det' to 'clas'

* delete infer demo

* fix format

* fix glog error

* fix onnx script

* tensorrt add dynamic shape config

* support seg2.1 model

* fix tensorrt config

* add tensorrt demo

* modify tensorrt init

* CMakeLists add tensorrt demo

* CMakeLists add windows tensorrt

* modify docs

* modify docs
heliqi 4 năm trước cách đây
mục cha
commit
3415ec759a
33 tập tin đã thay đổi với 725 bổ sung269 xóa
  1. 26 2
      dygraph/deploy/cpp/CMakeLists.txt
  2. 5 0
      dygraph/deploy/cpp/CMakeSettings.json
  3. 3 3
      dygraph/deploy/cpp/README.md
  4. 92 0
      dygraph/deploy/cpp/demo/batch_infer.cpp
  5. 16 62
      dygraph/deploy/cpp/demo/model_infer.cpp
  6. 9 8
      dygraph/deploy/cpp/demo/multi_gpu_model_infer.cpp
  7. 1 1
      dygraph/deploy/cpp/demo/onnx_tensorrt/CMakeLists.txt
  8. 8 6
      dygraph/deploy/cpp/demo/onnx_tensorrt/model_infer.cpp
  9. 1 1
      dygraph/deploy/cpp/demo/onnx_triton/CMakeLists.txt
  10. 6 2
      dygraph/deploy/cpp/demo/onnx_triton/model_infer.cpp
  11. 78 0
      dygraph/deploy/cpp/demo/tensorrt_infer.cpp
  12. 146 32
      dygraph/deploy/cpp/docs/apis/model.md
  13. 24 25
      dygraph/deploy/cpp/docs/compile/paddle/linux.md
  14. 13 15
      dygraph/deploy/cpp/docs/compile/paddle/windows.md
  15. 11 15
      dygraph/deploy/cpp/docs/demo/model_infer.md
  16. 2 2
      dygraph/deploy/cpp/docs/demo/multi_gpu_model_infer.md
  17. 150 0
      dygraph/deploy/cpp/docs/demo/tensorrt_infer.md
  18. BIN
      dygraph/deploy/cpp/docs/images/tensorrt.png
  19. 3 2
      dygraph/deploy/cpp/docs/models/paddleclas.md
  20. 19 17
      dygraph/deploy/cpp/docs/models/paddledetection.md
  21. 6 8
      dygraph/deploy/cpp/docs/models/paddleseg.md
  22. 11 18
      dygraph/deploy/cpp/model_deploy/common/include/base_model.h
  23. 5 6
      dygraph/deploy/cpp/model_deploy/common/include/multi_gpu_model.h
  24. 1 0
      dygraph/deploy/cpp/model_deploy/common/include/paddle_deploy.h
  25. 21 4
      dygraph/deploy/cpp/model_deploy/engine/include/engine_config.h
  26. 1 0
      dygraph/deploy/cpp/model_deploy/engine/include/tensorrt_engine.h
  27. 10 11
      dygraph/deploy/cpp/model_deploy/engine/src/ppinference_engine.cpp
  28. 16 19
      dygraph/deploy/cpp/model_deploy/engine/src/tensorrt_engine.cpp
  29. 2 8
      dygraph/deploy/cpp/model_deploy/engine/src/triton_engine.cpp
  30. 5 0
      dygraph/deploy/cpp/model_deploy/ppseg/include/seg_postprocess.h
  31. 32 0
      dygraph/deploy/cpp/model_deploy/ppseg/src/seg_postprocess.cpp
  32. 1 1
      dygraph/deploy/cpp/scripts/tensorrt_build.sh
  33. 1 1
      dygraph/deploy/cpp/scripts/triton_build.sh

+ 26 - 2
dygraph/deploy/cpp/CMakeLists.txt

@@ -133,6 +133,9 @@ if(WITH_GPU)
       message(FATAL_ERROR "please set CUDNN_LIB with -DCUDNN_LIB=/path/cudnn/")
     endif()
 
+    set(DEPS ${DEPS} ${CUDA_LIB}/libcudart${CMAKE_SHARED_LIBRARY_SUFFIX})
+    set(DEPS ${DEPS} ${CUDNN_LIB}/libcudnn${CMAKE_SHARED_LIBRARY_SUFFIX})
+
     if (WITH_TENSORRT)
       include_directories("${TENSORRT_DIR}/include")
       link_directories("${TENSORRT_DIR}/lib")
@@ -140,12 +143,20 @@ if(WITH_GPU)
       set(DEPS ${DEPS} ${TENSORRT_DIR}/lib/libnvinfer${CMAKE_SHARED_LIBRARY_SUFFIX})
       set(DEPS ${DEPS} ${TENSORRT_DIR}/lib/libnvinfer_plugin${CMAKE_SHARED_LIBRARY_SUFFIX})
     endif()
-    set(DEPS ${DEPS} ${CUDA_LIB}/libcudart${CMAKE_SHARED_LIBRARY_SUFFIX})
-    set(DEPS ${DEPS} ${CUDNN_LIB}/libcudnn${CMAKE_SHARED_LIBRARY_SUFFIX})
+    
   else()
     set(DEPS ${DEPS} ${CUDA_LIB}/cudart${CMAKE_STATIC_LIBRARY_SUFFIX} )
     set(DEPS ${DEPS} ${CUDA_LIB}/cublas${CMAKE_STATIC_LIBRARY_SUFFIX} )
     set(DEPS ${DEPS} ${CUDA_LIB}/cudnn${CMAKE_STATIC_LIBRARY_SUFFIX})
+
+    if (WITH_TENSORRT)
+      include_directories("${TENSORRT_DIR}/include")
+      link_directories("${TENSORRT_DIR}/lib")
+
+      set(DEPS ${DEPS} ${TENSORRT_DIR}/lib/nvinfer${CMAKE_STATIC_LIBRARY_SUFFIX})
+      set(DEPS ${DEPS} ${TENSORRT_DIR}/lib/nvinfer_plugin${CMAKE_STATIC_LIBRARY_SUFFIX})
+      set(DEPS ${DEPS} ${TENSORRT_DIR}/lib/myelin64_1${CMAKE_STATIC_LIBRARY_SUFFIX})
+    endif()
   endif()
 endif()
 
@@ -202,15 +213,28 @@ add_executable(model_infer ${PROJECT_ROOT_DIR}/demo/model_infer.cpp ${SRC} ${ENG
 ADD_DEPENDENCIES(model_infer ext-yaml-cpp)
 target_link_libraries(model_infer ${DEPS})
 
+add_executable(batch_infer ${PROJECT_ROOT_DIR}/demo/batch_infer.cpp ${SRC} ${ENGINE_SRC} ${DETECTOR_SRC})
+ADD_DEPENDENCIES(batch_infer ext-yaml-cpp)
+target_link_libraries(batch_infer ${DEPS})
+
 add_executable(multi_gpu_model_infer ${PROJECT_ROOT_DIR}/demo/multi_gpu_model_infer.cpp ${SRC} ${ENGINE_SRC} ${DETECTOR_SRC})
 ADD_DEPENDENCIES(multi_gpu_model_infer ext-yaml-cpp)
 target_link_libraries(multi_gpu_model_infer ${DEPS})
 
+if (WITH_TENSORRT)
+  add_executable(tensorrt_infer ${PROJECT_ROOT_DIR}/demo/tensorrt_infer.cpp ${SRC} ${ENGINE_SRC} ${DETECTOR_SRC})
+  ADD_DEPENDENCIES(tensorrt_infer ext-yaml-cpp)
+  target_link_libraries(tensorrt_infer ${DEPS})
+endif()
+
 if(WIN32)
   add_custom_command(TARGET model_infer POST_BUILD
     COMMAND ${CMAKE_COMMAND} -E copy ${PADDLE_DIR}/third_party/install/mklml/lib/mklml.dll ${CMAKE_BINARY_DIR}/paddle_deploy
     COMMAND ${CMAKE_COMMAND} -E copy ${PADDLE_DIR}/third_party/install/mklml/lib/libiomp5md.dll ${CMAKE_BINARY_DIR}/paddle_deploy
     COMMAND ${CMAKE_COMMAND} -E copy ${PADDLE_DIR}/third_party/install/mkldnn/lib/mkldnn.dll  ${CMAKE_BINARY_DIR}/paddle_deploy
     COMMAND ${CMAKE_COMMAND} -E copy ${PADDLE_DIR}/paddle/lib/paddle_inference.dll ${CMAKE_BINARY_DIR}/paddle_deploy
+    COMMAND ${CMAKE_COMMAND} -E copy ${TENSORRT_DIR}/lib/nvinfer.dll ${CMAKE_BINARY_DIR}/paddle_deploy
+    COMMAND ${CMAKE_COMMAND} -E copy ${TENSORRT_DIR}/lib/nvinfer_plugin.dll ${CMAKE_BINARY_DIR}/paddle_deploy
+    COMMAND ${CMAKE_COMMAND} -E copy ${TENSORRT_DIR}/lib/myelin64_1.dll ${CMAKE_BINARY_DIR}/paddle_deploy
   )
 endif()

+ 5 - 0
dygraph/deploy/cpp/CMakeSettings.json

@@ -29,6 +29,11 @@
           "type": "PATH"
         },
         {
+          "name": "TENSORRT_DIR",
+          "value": "path\\to\\TensorRT_dir",
+          "type": "PATH"
+        },
+        {
           "name": "WITH_STATIC_LIB",
           "value": "True",
           "type": "BOOL"

+ 3 - 3
dygraph/deploy/cpp/README.md

@@ -3,10 +3,10 @@
 本目录下代码,目前支持以下飞桨官方套件基于PaddleInference的部署。
 
 ## 模型套件支持
-- PaddleDetection([release/2.0](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0))
-- PaddleSeg([release/2.0](https://github.com/PaddlePaddle/PaddleSeg/tree/release/v2.0))
+- PaddleDetection([release/2.1](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.1))
+- PaddleSeg([release/2.1](https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.1))
 - PaddleClas([release/2.1](https://github.com/PaddlePaddle/PaddleClas/tree/release/2.1))
-- PaddleX([release/2.0-rc](https://github.com/PaddlePaddle/PaddleX/tree/release/2.0-rc))
+- PaddleX([release/2.0-rc](https://github.com/PaddlePaddle/PaddleX))
 
 ## 硬件支持
 - CPU(linux/windows)

+ 92 - 0
dygraph/deploy/cpp/demo/batch_infer.cpp

@@ -0,0 +1,92 @@
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <gflags/gflags.h>
+#include <omp.h>
+#include <memory>
+#include <string>
+#include <fstream>
+
+#include "model_deploy/common/include/paddle_deploy.h"
+
+DEFINE_string(model_filename, "", "Path of det inference model");
+DEFINE_string(params_filename, "", "Path of det inference params");
+DEFINE_string(cfg_file, "", "Path of yaml file");
+DEFINE_string(model_type, "", "model type");
+DEFINE_string(image_list, "", "Path of test image file");
+DEFINE_int32(batch_size, 1, "Batch size of infering");
+DEFINE_bool(use_gpu, false, "Infering with GPU or CPU");
+DEFINE_int32(gpu_id, 0, "GPU card id");
+DEFINE_bool(use_trt, false, "Infering with TensorRT");
+
+int main(int argc, char** argv) {
+  // Parsing command-line
+  google::ParseCommandLineFlags(&argc, &argv, true);
+
+  // create model
+  std::shared_ptr<PaddleDeploy::Model> model =
+        PaddleDeploy::CreateModel(FLAGS_model_type);
+
+  // model init
+  model->Init(FLAGS_cfg_file);
+
+  // inference engine init
+  PaddleDeploy::PaddleEngineConfig engine_config;
+  engine_config.model_filename = FLAGS_model_filename;
+  engine_config.params_filename = FLAGS_params_filename;
+  engine_config.use_gpu = FLAGS_use_gpu;
+  engine_config.gpu_id = FLAGS_gpu_id;
+  engine_config.use_trt = FLAGS_use_trt;
+  if (FLAGS_use_trt) {
+    engine_config.precision = 0;
+  }
+  model->PaddleEngineInit(engine_config);
+
+  // Mini-batch
+  std::vector<std::string> image_paths;
+  if (FLAGS_image_list != "") {
+    std::ifstream inf(FLAGS_image_list);
+    if (!inf) {
+      std::cerr << "Fail to open file " << FLAGS_image_list << std::endl;
+      return -1;
+    }
+    std::string image_path;
+    while (getline(inf, image_path)) {
+      image_paths.push_back(image_path);
+    }
+  }
+
+  // infer
+  std::vector<PaddleDeploy::Result> results;
+  for (int i = 0; i < image_paths.size(); i += FLAGS_batch_size) {
+    // Read image
+    int im_vec_size =
+        std::min(static_cast<int>(image_paths.size()), i + FLAGS_batch_size);
+    std::vector<cv::Mat> im_vec(im_vec_size - i);
+    #pragma omp parallel for num_threads(im_vec_size - i)
+    for (int j = i; j < im_vec_size; ++j) {
+      im_vec[j - i] = std::move(cv::imread(image_paths[j], 1));
+    }
+
+    model->Predict(im_vec, &results);
+
+    std::cout << i / FLAGS_batch_size << " group -----" << std::endl;
+    for (auto j = 0; j < results.size(); ++j) {
+      std::cout << "Result for sample " << j << std::endl;
+      std::cout << results[j] << std::endl;
+    }
+  }
+
+  return 0;
+}

+ 16 - 62
dygraph/deploy/cpp/demo/model_infer.cpp

@@ -12,11 +12,9 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include <glog/logging.h>
-#include <omp.h>
-#include <memory>
+#include <gflags/gflags.h>
 #include <string>
-#include <fstream>
+#include <vector>
 
 #include "model_deploy/common/include/paddle_deploy.h"
 
@@ -25,81 +23,37 @@ DEFINE_string(params_filename, "", "Path of det inference params");
 DEFINE_string(cfg_file, "", "Path of yaml file");
 DEFINE_string(model_type, "", "model type");
 DEFINE_string(image, "", "Path of test image file");
-DEFINE_string(image_list, "", "Path of test image file");
 DEFINE_bool(use_gpu, false, "Infering with GPU or CPU");
 DEFINE_int32(gpu_id, 0, "GPU card id");
-DEFINE_bool(use_mkl, true, "Infering with mkl");
-DEFINE_int32(batch_size, 1, "Batch size of infering");
-DEFINE_int32(thread_num, 1, "thread num of preprocessing");
-DEFINE_int32(mkl_thread_num, 8, "thread num of mkldnn");
 
 int main(int argc, char** argv) {
   // Parsing command-line
   google::ParseCommandLineFlags(&argc, &argv, true);
-  std::cout << "ParseCommandLineFlags:FLAGS_model_type="
-            << FLAGS_model_type << " model_filename="
-            << FLAGS_model_filename << std::endl;
 
   // create model
   std::shared_ptr<PaddleDeploy::Model> model =
         PaddleDeploy::CreateModel(FLAGS_model_type);
-  if (!model) {
-    std::cout << "no model_type: " << FLAGS_model_type
-              << "  model=" << model << std::endl;
-    return 0;
-  }
-  std::cout << "start model init " << std::endl;
 
   // model init
   model->Init(FLAGS_cfg_file);
-  std::cout << "start engine init " << std::endl;
 
-  // inference engine in
-  model->PaddleEngineInit(FLAGS_model_filename,
-                          FLAGS_params_filename,
-                          FLAGS_use_gpu,
-                          FLAGS_gpu_id,
-                          FLAGS_use_mkl,
-                          FLAGS_mkl_thread_num);
+  // inference engine init
+  PaddleDeploy::PaddleEngineConfig engine_config;
+  engine_config.model_filename = FLAGS_model_filename;
+  engine_config.params_filename = FLAGS_params_filename;
+  engine_config.use_gpu = FLAGS_use_gpu;
+  engine_config.gpu_id = FLAGS_gpu_id;
+  model->PaddleEngineInit(engine_config);
 
-  // Mini-batch
-  std::vector<std::string> image_paths;
-  if (FLAGS_image_list != "") {
-    std::ifstream inf(FLAGS_image_list);
-    if (!inf) {
-      std::cerr << "Fail to open file " << FLAGS_image_list << std::endl;
-      return -1;
-    }
-    std::string image_path;
-    while (getline(inf, image_path)) {
-      image_paths.push_back(image_path);
-    }
-  } else if (FLAGS_image != "") {
-    image_paths.push_back(FLAGS_image);
-  } else {
-    std::cerr << "image_list or image should be defined" << std::endl;
-    return -1;
-  }
+  // prepare data
+  std::vector<cv::Mat> imgs;
+  imgs.push_back(std::move(cv::imread(FLAGS_image)));
 
-  std::cout << "start model predict " << image_paths.size() << std::endl;
-  // infer
+  // predict
   std::vector<PaddleDeploy::Result> results;
-  for (int i = 0; i < image_paths.size(); i += FLAGS_batch_size) {
-    // Read image
-    int im_vec_size =
-        std::min(static_cast<int>(image_paths.size()), i + FLAGS_batch_size);
-    std::vector<cv::Mat> im_vec(im_vec_size - i);
-    #pragma omp parallel for num_threads(im_vec_size - i)
-    for (int j = i; j < im_vec_size; ++j) {
-      im_vec[j - i] = std::move(cv::imread(image_paths[j], 1));
-    }
+  model->Predict(imgs, &results, 1);
+
+  std::cout << results[0] << std::endl;
 
-    model->Predict(im_vec, &results, FLAGS_thread_num);
-    std::cout << i / FLAGS_batch_size << " group" << std::endl;
-    for (auto j = 0; j < results.size(); ++j) {
-      std::cout << "Result for sample " << j << std::endl;
-      std::cout << results[j] << std::endl;
-    }
-  }
   return 0;
 }

+ 9 - 8
dygraph/deploy/cpp/demo/multi_gpu_model_infer.cpp

@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include <glog/logging.h>
+#include <gflags/gflags.h>
 #include <omp.h>
 #include <memory>
 #include <string>
@@ -30,11 +30,7 @@ DEFINE_int32(batch_size, 1, "Batch size of infering");
 DEFINE_int32(thread_num, 1, "thread num of preprocessing");
 
 int main(int argc, char** argv) {
-  // Parsing command-line
   google::ParseCommandLineFlags(&argc, &argv, true);
-  std::cout << "ParseCommandLineFlags:FLAGS_model_type="
-            << FLAGS_model_type << " model_filename="
-            << FLAGS_model_filename << std::endl;
 
   std::vector<int> gpu_ids;
   std::stringstream gpu_ids_str(FLAGS_gpu_id);
@@ -54,11 +50,16 @@ int main(int argc, char** argv) {
     return -1;
   }
 
-  if (!model.PaddleEngineInit(FLAGS_model_filename,
-                              FLAGS_params_filename,
-                              gpu_ids)) {
+  // engine init
+  PaddleDeploy::PaddleEngineConfig engine_config;
+  engine_config.model_filename = FLAGS_model_filename;
+  engine_config.params_filename = FLAGS_params_filename;
+  engine_config.use_gpu = true;
+  engine_config.max_batch_size = FLAGS_batch_size;
+  if (!model.PaddleEngineInit(engine_config, gpu_ids)) {
     return -1;
   }
+
   // Mini-batch
   if (FLAGS_image_list == "") {
     std::cerr << "image_list should be defined" << std::endl;

+ 1 - 1
dygraph/deploy/cpp/demo/tensorrt/CMakeLists.txt → dygraph/deploy/cpp/demo/onnx_tensorrt/CMakeLists.txt

@@ -83,6 +83,6 @@ aux_source_directory(${PROJECT_ROOT_DIR}/model_deploy/ppseg/src DETECTOR_SRC)
 aux_source_directory(${PROJECT_ROOT_DIR}/model_deploy/ppclas/src DETECTOR_SRC)
 aux_source_directory(${PROJECT_ROOT_DIR}/model_deploy/paddlex/src DETECTOR_SRC)
 
-add_executable(model_infer ${PROJECT_ROOT_DIR}/demo/tensorrt/model_infer.cpp ${SRC} ${ENGINE_SRC} ${DETECTOR_SRC})
+add_executable(model_infer ${PROJECT_ROOT_DIR}/demo/onnx_tensorrt/model_infer.cpp ${SRC} ${ENGINE_SRC} ${DETECTOR_SRC})
 ADD_DEPENDENCIES(model_infer ext-yaml-cpp)
 target_link_libraries(model_infer ${DEPS})

+ 8 - 6
dygraph/deploy/cpp/demo/tensorrt/model_infer.cpp → dygraph/deploy/cpp/demo/onnx_tensorrt/model_infer.cpp

@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include <glog/logging.h>
+#include <gflags/gflags.h>
 #include <omp.h>
 #include <memory>
 #include <string>
@@ -51,11 +51,13 @@ int main(int argc, char** argv) {
   std::cout << "start engine init " << std::endl;
 
   // inference engine init
-  model->TensorRTInit(FLAGS_model_file,
-                      FLAGS_cfg_file,
-                      FLAGS_gpu_id,
-                      FLAGS_save_engine,
-                      FLAGS_trt_cache_file);
+  PaddleDeploy::TensorRTEngineConfig engine_config;
+  engine_config.model_file_ = FLAGS_model_file;
+  engine_config.cfg_file_ = FLAGS_cfg_file;
+  engine_config.gpu_id_ = FLAGS_gpu_id;
+  engine_config.save_engine_ = FLAGS_save_engine;
+  engine_config.trt_cache_file_ = FLAGS_trt_cache_file;
+  model->TensorRTInit(engine_config);
 
   // prepare data
   std::vector<std::string> image_paths;

+ 1 - 1
dygraph/deploy/cpp/demo/triton/CMakeLists.txt → dygraph/deploy/cpp/demo/onnx_triton/CMakeLists.txt

@@ -70,6 +70,6 @@ aux_source_directory(${PROJECT_ROOT_DIR}/model_deploy/ppseg/src DETECTOR_SRC)
 aux_source_directory(${PROJECT_ROOT_DIR}/model_deploy/ppclas/src DETECTOR_SRC)
 aux_source_directory(${PROJECT_ROOT_DIR}/model_deploy/paddlex/src DETECTOR_SRC)
 
-add_executable(model_infer ${PROJECT_ROOT_DIR}/demo/triton/model_infer.cpp ${SRC} ${ENGINE_SRC} ${DETECTOR_SRC})
+add_executable(model_infer ${PROJECT_ROOT_DIR}/demo/onnx_triton/model_infer.cpp ${SRC} ${ENGINE_SRC} ${DETECTOR_SRC})
 ADD_DEPENDENCIES(model_infer ext-yaml-cpp)
 target_link_libraries(model_infer ${DEPS})

+ 6 - 2
dygraph/deploy/cpp/demo/triton/model_infer.cpp → dygraph/deploy/cpp/demo/onnx_triton/model_infer.cpp

@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include <glog/logging.h>
+#include <gflags/gflags.h>
 #include <omp.h>
 #include <memory>
 #include <string>
@@ -50,7 +50,11 @@ int main(int argc, char** argv) {
   std::cout << "start engine init " << std::endl;
 
   // inference engine init
-  model->TritonEngineInit(FLAGS_url, FLAGS_model_name, FLAGS_model_version);
+  PaddleDeploy::TritonEngineConfig engine_config;
+  engine_config.url_ = FLAGS_url;
+  engine_config.model_name_ = FLAGS_model_name;
+  engine_config.model_version_ = FLAGS_model_version;
+  model->TritonEngineInit(engine_config);
 
   // prepare data
   std::vector<std::string> image_paths;

+ 78 - 0
dygraph/deploy/cpp/demo/tensorrt_infer.cpp

@@ -0,0 +1,78 @@
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <gflags/gflags.h>
+#include <string>
+#include <vector>
+
+#include "model_deploy/common/include/paddle_deploy.h"
+
+DEFINE_string(model_filename, "", "Path of det inference model");
+DEFINE_string(params_filename, "", "Path of det inference params");
+DEFINE_string(cfg_file, "", "Path of yaml file");
+DEFINE_string(model_type, "", "model type");
+DEFINE_string(image, "", "Path of test image file");
+DEFINE_int32(gpu_id, 0, "GPU card id");
+
+int main(int argc, char** argv) {
+  // Parsing command-line
+  google::ParseCommandLineFlags(&argc, &argv, true);
+
+  // create model
+  std::shared_ptr<PaddleDeploy::Model> model =
+        PaddleDeploy::CreateModel(FLAGS_model_type);
+
+  // model init
+  model->Init(FLAGS_cfg_file);
+
+  // inference engine init
+  PaddleDeploy::PaddleEngineConfig engine_config;
+  engine_config.model_filename = FLAGS_model_filename;
+  engine_config.params_filename = FLAGS_params_filename;
+  engine_config.gpu_id = FLAGS_gpu_id;
+  engine_config.use_gpu = true;
+  engine_config.use_trt = true;
+  engine_config.precision = 0;
+  engine_config.min_subgraph_size = 10;
+  engine_config.max_workspace_size = 1 << 30;
+  if ("clas" == FLAGS_model_type) {
+    // Adjust shape according to the actual model
+    engine_config.min_input_shape["inputs"] = {1, 3, 224, 224};
+    engine_config.max_input_shape["inputs"] = {1, 3, 224, 224};
+    engine_config.optim_input_shape["inputs"] = {1, 3, 224, 224};
+  } else if ("det" == FLAGS_model_type) {
+    // Adjust shape according to the actual model
+    engine_config.min_input_shape["image"] = {1, 3, 608, 608};
+    engine_config.max_input_shape["image"] = {1, 3, 608, 608};
+    engine_config.optim_input_shape["image"] = {1, 3, 608, 608};
+  } else if ("seg" == FLAGS_model_type) {
+    engine_config.min_input_shape["x"] = {1, 3, 100, 100};
+    engine_config.max_input_shape["x"] = {1, 3, 2000, 2000};
+    engine_config.optim_input_shape["x"] = {1, 3, 1024, 1024};
+    // Additional nodes need to be added, pay attention to the output prompt
+  }
+  model->PaddleEngineInit(engine_config);
+
+  // prepare data
+  std::vector<cv::Mat> imgs;
+  imgs.push_back(std::move(cv::imread(FLAGS_image)));
+
+  // predict
+  std::vector<PaddleDeploy::Result> results;
+  model->Predict(imgs, &results, 1);
+
+  std::cout << results[0] << std::endl;
+
+  return 0;
+}

+ 146 - 32
dygraph/deploy/cpp/docs/apis/model.md

@@ -8,6 +8,8 @@
 4. [预测结果字段](#004)
 5. [代码示例](#005)
 
+
+
 <span id="001"></span>
 
 ## 1. 创建模型对象
@@ -38,7 +40,7 @@ std::shared_ptr<PaddleDeploy::Model>  PaddleDeploy::ModelFactory::CreateObject(c
 
 ## 2. 模型初始化
 
-模型初始化包括2个步骤,第一步读取配置文件,初始化数据预处理和后处理相关操作;第二步初始化推理PaddleInference推理引擎;对应的接口分别为`PaddleDeploy::Model::Init()`和`PaddleDeploy::Model::PaddleEngineInit()`
+模型初始化包括2个步骤,第一步读取配置文件,初始化数据预处理和后处理相关操作;第二步初始化推理推理引擎;对应的接口分别为`PaddleDeploy::Model::Init()`和`PaddleDeploy::Model::XXXEngineInit()`
 
 ### 2.1 模型前后处理初始化
 
@@ -69,29 +71,14 @@ bool Model::Init(const std::string& cfg_file)
 ### 2.2 推理引擎初始化
 
 ```c++
-bool Model::PaddleEngineInit(const std::string& model_filename,
-                             const std::string& params_filename,
-                             bool use_gpu = false,
-                             int gpu_id = 0,
-                             bool use_mkl = true,
-                             int mkl_thread_num = 8)
+bool PaddleEngineInit(const PaddleEngineConfig& engine_config);
 ```
 
-> 初始化Paddle 推理引擎,  创建Model或者其子类对象后必须先调用它初始化,才能调推理接口。
+> 初始化Paddle推理引擎,创建Model或者其子类对象后必须先调用它初始化,才能调推理接口。
 
 **参数**
 
-> >**model_filename** 模型结构文件路径,如`model.pdmodel`或`__model__`
-> >
-> >**params_filename** 模型权重文件路径,如`model.pdiparams`或`__params__`
-> >
-> >**use_gpu** 是否使用GPU
-> >
-> >**gpu_id** GPU设备ID
-> >
-> >**use_mkl** 是否使用mkldnn(CPU上推理加速引擎)
-> >
-> >**mkl_thread_num** 使用mkldnn时的推理线程数
+> >**engine_config** Paddle推理引擎配置文件,具体参数说明请看[推理引擎配置参数](#004)
 
 **返回值**
 
@@ -100,13 +87,55 @@ bool Model::PaddleEngineInit(const std::string& model_filename,
 **代码示例**
 
 > ```c++
-> if (!modle->PaddleEngineInit("yolov3_mbv1/model/model.pdmodel",
->                              "yolov3_mbv1/model/model.pdiparams",
->                              true, 0)) {
+> if (!modle->PaddleEngineInit(engine_config)) {
 >   std::cerr << "Fail to execute model->PaddleEngineInit()" << std::endl;
 > }
 > ```
 
+```c++
+bool TritonEngineInit(const TritonEngineConfig& engine_config);
+```
+
+> 初始化Triton推理引擎,用于ONNX模型推理。创建Model或者其子类对象后必须先调用它初始化,才能调推理接口。
+
+**参数**
+
+> >**engine_config** Triton推理引擎配置文件,具体参数说明请看[推理引擎配置参数](#004)
+
+**返回值**
+
+>  `true`或`false`,表示是否正确初始化
+
+**代码示例**
+
+> ```c++
+> if (!modle->TritonEngineInit(engine_config)) {
+>   std::cerr << "Fail to execute model->TritonEngineInit()" << std::endl;
+> }
+> ```
+
+```c++
+bool TensorRTInit(const TensorRTEngineConfig& engine_config);
+```
+
+> 初始化TensorRT推理引擎,用于ONNX模型推理。创建Model或者其子类对象后必须先调用它初始化,才能调推理接口。
+
+**参数**
+
+> >**engine_config** TensorRT推理引擎配置文件,具体参数说明请看[推理引擎配置参数](#004)
+
+**返回值**
+
+>  `true`或`false`,表示是否正确初始化
+
+**代码示例**
+
+> ```c++
+> if (!modle->PaddleEngineInit(engine_config)) {
+>   std::cerr << "Fail to execute model->TensorRTEngineInit()" << std::endl;
+> }
+> ```
+
 
 
 <span id="003"></span>
@@ -119,6 +148,7 @@ bool Model::PaddleEngineInit(const std::string& model_filename,
 
 
 
+
 ## 3.1 预测接口
 
 ```c++
@@ -133,7 +163,7 @@ bool Model::PaddleEngineInit(const std::string& model_filename,
 
 > **imgs** 传入的vector,元素为cv::Mat,预测时将会对vector中所有Mat进行预处理,并作为一个batch输入给推理引擎进行预测;开发者在调用时,需考虑硬件配置,vector的size过大时,可能会由于显存或内存不足导致程序出错
 >
-> **results** 预测结果vector,其与输入的imgs长度相同,vector中每个元素说明参考[预测结果字段说明](#004)
+> **results** 预测结果vector,其与输入的imgs长度相同,vector中每个元素说明参考[预测结果字段说明](#005)
 >
 > **thread_num** 当输入vector的size大于1时,可通过thread_num来配置预处理和后处理的并行处理时的多线程数量
 
@@ -154,6 +184,7 @@ bool Model::PaddleEngineInit(const std::string& model_filename,
 
 
 
+
 ### 3.2 预处理接口
 
 ```c++
@@ -234,6 +265,7 @@ if (!model->Infer(inputs, &outputs)) {
 
 
 
+
 ### 3.4 后处理接口
 
 ```c++
@@ -287,9 +319,89 @@ bool Model::Postprocess(const std::vector<PaddleDeploy::DataBlob>& outputs,
 
 
 
+
 <span id="004"></span>
 
-## 4. 预测结果字段
+## 4. 推理引擎配置参数
+
+### 4.1 Paddle推理引擎配置
+
+```c++
+  std::string model_filename = "";  // 模型文件
+
+  std::string params_filename = ""; // 模型参数
+
+  bool use_mkl = true; // 是否开启mkl
+
+  int mkl_thread_num = 8; // mkl并行线程数
+
+  bool use_gpu = false; // 是否使用GPU进行推理
+
+  int gpu_id = 0; // 使用编号几的GPU
+
+  bool use_ir_optim = true; // 是否开启IR优化
+
+  bool use_trt = false; // 是否使用TensorRT
+
+  int max_batch_size = 1; // TensorRT最大batch大小
+
+  int min_subgraph_size = 1; // TensorRT 子图最小节点数
+
+  /*Set TensorRT data precision
+  0: FP32
+  1: FP16
+  2: Int8
+  */
+  int precision = 0;
+
+  int max_workspace_size = 1 << 10; // TensorRT申请的显存大小,1 << 10 = 1M
+
+  std::map<std::string, std::vector<int>> min_input_shape; // 模型动态shape的最下输入形状, TensorRT才需要设置
+
+  std::map<std::string, std::vector<int>> max_input_shape; // 模型动态shape的最大输入形状, TensorRT才需要设置
+
+  std::map<std::string, std::vector<int>> optim_input_shape; // 模型动态shape的最常见输入形状, TensorRT才需要设置
+```
+
+
+
+### 4.2 Triton推理引擎配置
+
+```c++
+  std::string model_name_; // 向Triton Server请求的模型名称
+
+  std::string model_version_; // Triton Server中模型的版本号
+
+  uint64_t server_timeout_ = 0; // 服务端最大计算时间
+
+  uint64_t client_timeout_ = 0; // 客户端等待时间, 默认一直等
+
+  bool verbose_ = false; // 是否打开客户端日志
+
+  std::string url_; // Triton Server地址
+```
+
+
+
+### 4.1 TensorRT推理引擎配置
+
+```c++
+  std::string model_file_; // ONNX 模型地址
+
+  std::string cfg_file_; // 模型配置文件地址
+
+  int max_workspace_size_ = 1<<28; // GPU显存大小
+
+  int max_batch_size_ = 1; // 最大batch
+
+  int gpu_id_ = 0; // 使用编号几的GPU
+```
+
+
+
+<span id="005"></span>
+
+## 5. 预测结果字段
 
 在开发者常调用的接口中,包括主要的`Model::Predict`,以及`Model::Predict`内部调用的`Model::Preprocess`、`Model::Infer`和`Model::Postprocess`接口,涉及到的结构体主要为`PaddleDeploy::Result`(预测结果),`PaddleDeploy::DataBlob`和`PaddleDeploy::ShapeInfo`。其中`DataBlob`和`ShapeInfo`开发者较少用到,可直接阅读其代码实现。
 
@@ -311,7 +423,7 @@ struct Result {
 
 
 
-### 4.1 图像分类结果
+### 5.1 图像分类结果
 
 ```C++
 struct ClasResult {
@@ -323,7 +435,7 @@ struct ClasResult {
 
 
 
-### 4.2 目标检测结果
+### 5.2 目标检测结果
 
 ```C++
 struct DetResult {
@@ -352,7 +464,7 @@ struct Mask {
 
 
 
-### 4.3 语义分割结果
+### 5.3 语义分割结果
 
 ```c++
 struct SegResult {
@@ -371,9 +483,9 @@ struct Mask {
 
 
 
-<span id="005"></span>
+<span id="006"></span>
 
-## 5. 部署代码示例
+## 6. 部署代码示例
 
 以下示例代码以目标检测模型为例
 
@@ -384,9 +496,11 @@ int main() {
   std::shared_ptr<PaddleDeploy::Model> model =
                     PaddleDeploy::ModelFactory::CreateObject("det");
   model->Init("yolov3_mbv1/model/infer_cfg.yml");
-  model->PaddleEngineInit("yolov3_mbv1/model/model.pdmodel",
-                          "yolov3_mbv1/model/model.pdiparams",
-                          true, 0);
+  PaddleDeploy::PaddleEngineConfig engine_config;
+  engine_config.model_filename = "yolov3_mbv1/model/model.pdmodel"
+  engine_config.params_filename = "yolov3_mbv1/model/model.pdiparams"
+  engine_config.use_gpu = true;
+  engine_config.gpu_id = 0;
 
   std::vector<cv::Mat> images;
   std::vector<PaddleDeploy::Result> results;

+ 24 - 25
dygraph/deploy/cpp/docs/compile/paddle/linux.md

@@ -12,27 +12,23 @@ Ubuntu 16.04/18.04
 ### Step1: 获取部署代码
 ```
 git clone https://github.com/PaddlePaddle/PaddleX.git
-cd PaddleX
-git checkout deploykit
-cd deploy/cpp
+cd PaddleX/dygraph/deploy/cpp
 ```
-**说明**:`C++`预测代码在`PaddleX/deploy/cpp` 目录,该目录不依赖任何`PaddleX`下其他目录。所有的公共实现代码在`model_deploy`目录下,而示例demo代码为`demo/model_infer.cpp`
+**说明**:`C++`预测代码在`PaddleX/dygraph/deploy/cpp` 目录,该目录不依赖任何`PaddleX`下其他目录。所有的公共实现代码在`model_deploy`目录下,所有示例代码都在`demo`目录下。
 
 ### Step 2. 下载PaddlePaddle C++ 预测库
-PaddlePaddle C++ 预测库针对是否使用GPU、是否支持TensorRT、以及不同的CUDA版本提供了已经编译好的预测库,目前PaddleX支持Paddle预测库2.0+,最新2.0.2版本下载链接如下所示:
+PaddlePaddle C++ 预测库针对是否使用GPU、是否支持TensorRT、以及不同的CUDA版本提供了已经编译好的预测库,目前PaddleX支持Paddle预测库2.0+,最新2.1版本下载链接如下所示:
 
-| 版本说明                               | 预测库(2.0.2)                                                | 编译器  |
-| -------------------------------------- | ------------------------------------------------------------ | ------- |
-| CPU版本                                | [paddle_inference.tgz](https://paddle-inference-lib.bj.bcebos.com/2.0.2-cpu-avx-mkl/paddle_inference.tgz) | gcc 8.2 |
-| GPU版本(CUDA9.0/CUDNN7)              | [paddle_inference.tgz](https://paddle-inference-lib.bj.bcebos.com/2.0.2-gpu-cuda9-cudnn7-avx-mkl/paddle_inference.tgz) | gcc 5.4 |
-| GPU版本(CUDA10.0/CUDNN7)             | [paddle_inference.tgz](https://paddle-inference-lib.bj.bcebos.com/2.0.2-gpu-cuda10-cudnn7-avx-mkl/paddle_inference.tgz) | gcc 5.4 |
-| GPU版本(CUDA10.1/CUDNN7.6/TensorRT6) | [ paddle_inference.tgz](https://paddle-inference-lib.bj.bcebos.com/2.0.2-gpu-cuda10.1-cudnn7-avx-mkl/paddle_inference.tgz) | gcc 8.2 |
-| GPU版本(CUDA10.2/CUDNN8/TensorRT7)   | [ paddle_inference.tgz](https://paddle-inference-lib.bj.bcebos.com/2.0.2-gpu-cuda10.2-cudnn8-avx-mkl/paddle_inference.tgz) | gcc 8.2 |
-| GPU版本(CUDA11/CUDNN8/TensorRT7)     | [ paddle_inference.tgz](https://paddle-inference-lib.bj.bcebos.com/2.0.2-gpu-cuda11-cudnn8-avx-mkl/paddle_inference.tgz) | gcc 8.2 |
+| 版本说明                               | 预测库(2.1)                                                                                                                   | 编译器  |
+| -------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------- | ------- |
+| CPU版本                                | [paddle_inference.tgz](https://paddle-inference-lib.bj.bcebos.com/2.1.0-cpu-avx-mkl/paddle_inference.tgz)                     | gcc 8.2 |
+| GPU版本(CUDA10.1/CUDNN7.6/TensorRT6) | [ paddle_inference.tgz](https://paddle-inference-lib.bj.bcebos.com/2.1.0-gpu-cuda10.1-cudnn7-mkl-gcc8.2/paddle_inference.tgz) | gcc 8.2 |
+| GPU版本(CUDA10.2/CUDNN8/TensorRT7)   | [ paddle_inference.tgz](https://paddle-inference-lib.bj.bcebos.com/2.1.0-gpu-cuda10.2-cudnn8-mkl-gcc8.2/paddle_inference.tgz) | gcc 8.2 |
+| GPU版本(CUDA11/CUDNN8/TensorRT7)     | [ paddle_inference.tgz](https://paddle-inference-lib.bj.bcebos.com/2.1.0-gpu-cuda11.0-cudnn8-mkl-gcc8.2/paddle_inference.tgz) | gcc 8.2 |
 
-请根据实际情况选择下载,如若以上版本不满足您的需求,请至[C++预测库下载列表](https://www.paddlepaddle.org.cn/documentation/docs/zh/guides/05_inference_deployment/inference/build_and_install_lib_cn.html#linux)选择符合的版本。
+请根据实际情况选择下载,如若以上版本不满足您的需求,请至[C++预测库下载列表](https://paddleinference.paddlepaddle.org.cn/v2.1/user_guides/download_lib.html)选择符合的版本。
 
-将预测库解压后,其所在目录(例如解压至`PaddleX/deploy/cpp/paddle_inferenc/`)下主要包含的内容有:
+将预测库解压后,其所在目录(例如解压至`PaddleX/dygraph/deploy/cpp/paddle_inferenc/`)下主要包含的内容有:
 
 ```
 ├── paddle/ # paddle核心库和头文件
@@ -43,16 +39,18 @@ PaddlePaddle C++ 预测库针对是否使用GPU、是否支持TensorRT、以及
 ```
 
 ### Step 3. 修改编译参数
-根据自己的系统环境,修改`PaddleX/deploy/cpp/script/build.sh`脚本中的参数,主要修改的参数为以下几个
-| 参数 | 说明 |
-| :--- | :--- |
-| WITH_GPU | ON或OFF,表示是否使用GPU,当下载的为CPU预测库时,设为OFF |
-| PADDLE_DIR | 预测库所在路径,默认为`PaddleX/deploy/cpp/paddle_inference`目录下 |
-| CUDA_LIB | cuda相关lib文件所在的目录路径 |
-| CUDNN_LIB | cudnn相关lib文件所在的目录路径 |
+根据自己的系统环境,修改`PaddleX/dygraph/deploy/cpp/script/build.sh`脚本中的参数,主要修改的参数为以下几个
+| 参数          | 说明                                                                                 |
+| :------------ | :----------------------------------------------------------------------------------- |
+| WITH_GPU      | ON或OFF,表示是否使用GPU,当下载的为CPU预测库时,设为OFF                             |
+| PADDLE_DIR    | 预测库所在路径,默认为`PaddleX/deploy/cpp/paddle_inference`目录下                    |
+| CUDA_LIB      | cuda相关lib文件所在的目录路径                                                        |
+| CUDNN_LIB     | cudnn相关lib文件所在的目录路径                                                       |
+| WITH_TENSORRT | ON或OFF,表示是否使用开启TensorRT                                                    |
+| TENSORRT_DIR  | TensorRT 的路径,如果开启TensorRT开关WITH_TENSORRT,需修改为您实际安装的TensorRT路径 |
 
 ### Step 4. 编译
-修改完build.sh后执行编译, **[注意]**: 以下命令在`PaddleX/deploy/cpp`目录下进行执行
+修改完build.sh后执行编译, **[注意]**: 以下命令在`PaddleX/dygraph/deploy/cpp`目录下进行执行
 
 ```
 sh script/build.sh
@@ -62,7 +60,7 @@ sh script/build.sh
 > 编译过程,会调用script/bootstrap.sh联网下载opencv,以及yaml依赖包,如无法联网,用户按照下操作手动下载
 >
 > 1. 根据系统版本,点击右侧链接下载不同版本的opencv依赖 [Ubuntu 16.04](https://bj.bcebos.com/paddleseg/deploy/opencv3.4.6gcc4.8ffmpeg.tar.gz2)/[Ubuntu 18.04](https://bj.bcebos.com/paddlex/deploy/opencv3.4.6gcc4.8ffmpeg_ubuntu_18.04.tar.gz2)
-> 2. 解压下载的opencv依赖(解压后目录名为opencv3.4.6gcc4.8ffmpeg),创建目录`PaddleX/deploy/cpp/deps`,将解压后的目录拷贝至该创建的目录下
+> 2. 解压下载的opencv依赖(解压后目录名为opencv3.4.6gcc4.8ffmpeg),创建目录`PaddleX/dygraph/deploy/cpp/deps`,将解压后的目录拷贝至该创建的目录下
 > 3. [点击下载yaml依赖包](https://bj.bcebos.com/paddlex/deploy/deps/yaml-cpp.zip),无需解压
 > 4. 修改`PaddleX/deploy/cpp/cmake/yaml.cmake`文件,将`URL https://bj.bcebos.com/paddlex/deploy/deps/yaml-cpp.zip`中网址替换为第3步中下载的路径,如改为`URL /Users/Download/yaml-cpp.zip`
 > 5. 重新执行`sh script/build.sh`即可编译
@@ -71,10 +69,11 @@ sh script/build.sh
 
 ### Step 5. 编译结果
 
-编译后会在`PaddleX/deploy/cpp/build/demo`目录下生成`model_infer`和`multi_gpu_model_infer`两个可执行二进制文件示例,分别用于在单卡/多卡上加载模型进行预测,示例使用参考如下文档
+编译后会在`PaddleX/dygraph/deploy/cpp/build/demo`目录下生成`model_infer`、`multi_gpu_model_infer`和`batch_infer`等几个可执行二进制文件示例,分别用于在单卡/多卡/多batch上加载模型进行预测,示例使用参考如下文档
 
 - [单卡加载模型预测示例](../../demo/model_infer.md)
 - [多卡加载模型预测示例](../../demo/multi_gpu_model_infer.md)
+- [PaddleInference集成TensorRT加载模型预测示例](../../demo/tensorrt_infer.md)
 
 
 

+ 13 - 15
dygraph/deploy/cpp/docs/compile/paddle/windows.md

@@ -19,26 +19,23 @@ d:
 mkdir projects
 cd projects
 git clone https://github.com/PaddlePaddle/PaddleX.git
-git checkout deploykit
 ```
 
-**说明**:其中`C++`预测代码在`PaddleX\deploy\cpp` 目录,该目录不依赖任何`PaddleX`下其他目录。所有的公共实现代码在`model_deploy`目录下,而示例demo代码为`demo/model_infer.cpp`
+**说明**:其中`C++`预测代码在`PaddleX\dygraph\deploy\cpp` 目录,该目录不依赖任何`PaddleX`下其他目录。所有的公共实现代码在`model_deploy`目录下,所有示例代码都在`demo`目录下
 
 
 ### Step2: 下载PaddlePaddle C++ 预测库
 
-PaddlePaddle C++ 预测库针对是否使用GPU、是否支持TensorRT、以及不同的CUDA版本提供了已经编译好的预测库,目前PaddleX支持Paddle预测库2.0+,最新2.0.2版本下载链接如下所示:
+PaddlePaddle C++ 预测库针对是否使用GPU、是否支持TensorRT、以及不同的CUDA版本提供了已经编译好的预测库,目前PaddleX支持Paddle预测库2.0+,最新2.1版本下载链接如下所示:
 
-| 版本说明                     | 预测库(2.0.2)                                                                                                   | 编译器                | 构建工具      | cuDNN | CUDA |
-| ---------------------------- | --------------------------------------------------------------------------------------------------------------- | --------------------- | ------------- | ----- | ---- |
-| cpu_avx_mkl                  | [paddle_inference.zip](https://paddle-wheel.bj.bcebos.com/2.0.2/win-infer/mkl/cpu/paddle_inference.zip)         | Visual Studio 15 2017 | CMake v3.17.0 | -     | -    |
-| cuda10.0_cudnn7_avx_mkl      | [paddle_inference.zip](https://paddle-wheel.bj.bcebos.com/2.0.2/win-infer/mkl/post100/paddle_inference.zip)     | MSVC 2015 update 3    | CMake v3.17.0 | 7.4.1 | 10.0 |
-| cuda10.0_cudnn7_avx_mkl_trt6 | [paddle_inference.zip](https://paddle-wheel.bj.bcebos.com/2.0.2/win-infer/trt_mkl/post100/paddle_inference.zip) | MSVC 2015 update 3    | CMake v3.17.0 | 7.4.1 | 10.0 |
-| cuda10.1_cudnn7_avx_mkl_trt6 | [paddle_inference.zip](https://paddle-wheel.bj.bcebos.com/2.0.2/win-infer/trt_mkl/post101/paddle_inference.zip) | MSVC 2015 update 3    | CMake v3.17.0 | 7.6   | 10.1 |
-| cuda10.2_cudnn7_avx_mkl_trt7 | [paddle_inference.zip](https://paddle-wheel.bj.bcebos.com/2.0.2/win-infer/trt_mkl/post102/paddle_inference.zip) | MSVC 2015 update 3    | CMake v3.17.0 | 7.6   | 10.2 |
-| cuda11.0_cudnn8_avx_mkl_trt7 | [paddle_inference.zip](https://paddle-wheel.bj.bcebos.com/2.0.2/win-infer/trt_mkl/post11/paddle_inference.zip)  | MSVC 2015 update 3    | CMake v3.17.0 | 8.0   | 11.0 |
+| 版本说明                     | 预测库(2.1)                                                                                                    | 编译器                | 构建工具      | cuDNN | CUDA |
+| ---------------------------- | -------------------------------------------------------------------------------------------------------------- | --------------------- | ------------- | ----- | ---- |
+| cpu_avx_mkl                  | [paddle_inference.zip](https://paddle-wheel.bj.bcebos.com/2.1.0win/win-infer/mkl/cpu/paddle_inference.zip)     | Visual Studio 15 2017 | CMake v3.17.0 | -     | -    |
+| cuda10.1_cudnn7_avx_mkl_trt6 | [paddle_inference.zip](https://paddle-wheel.bj.bcebos.com/2.1.0win/win-infer/mkl/post101/paddle_inference.zip) | MSVC 2015 update 3    | CMake v3.17.0 | 7.6   | 10.1 |
+| cuda10.2_cudnn7_avx_mkl_trt7 | [paddle_inference.zip](https://paddle-wheel.bj.bcebos.com/2.1.0win/win-infer/mkl/post102/paddle_inference.zip) | MSVC 2015 update 3    | CMake v3.17.0 | 7.6   | 10.2 |
+| cuda11.0_cudnn8_avx_mkl_trt7 | [paddle_inference.zip](https://paddle-wheel.bj.bcebos.com/2.1.0win/win-infer/mkl/post110/paddle_inference.zip) | MSVC 2015 update 3    | CMake v3.17.0 | 8.0   | 11.0 |
 
-请根据实际情况选择下载,如若以上版本不满足您的需求,请至[C++预测库下载列表](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/guides/05_inference_deployment/inference/windows_cpp_inference.html)选择符合的版本。
+请根据实际情况选择下载,如若以上版本不满足您的需求,请至[C++预测库下载列表](https://paddleinference.paddlepaddle.org.cn/v2.1/user_guides/download_lib.html)选择符合的版本。
 
 将预测库解压后,其所在目录(例如`D:\projects\paddle_inference_install_dir\`)下主要包含的内容有:
 
@@ -68,7 +65,7 @@ PaddlePaddle C++ 预测库针对是否使用GPU、是否支持TensorRT、以及
 
 ![](../../images/vs2019_step2.png)
 
-选择C++预测代码所在路径(例如`D:\projects\PaddleX\deploy\cpp`),并打开`CMakeList.txt`:
+选择C++预测代码所在路径(例如`D:\projects\PaddleX\dygraph\deploy\cpp`),并打开`CMakeList.txt`:
 ![](../../images/vs2019_step3.png)
 
 3. 打开项目时,可能会自动构建。由于没有进行下面的依赖路径设置会报错,这个报错可以先忽略。
@@ -90,7 +87,7 @@ PaddlePaddle C++ 预测库针对是否使用GPU、是否支持TensorRT、以及
 
 - 如果使用`CPU`版预测库,请把`WITH_GPU`的`值`去掉勾
 - 如果使用的是`openblas`版本,请把`WITH_MKL`的`值`去掉勾
-- 如果无法联网,请手动点击下载 [yaml-cpp.zip](https://bj.bcebos.com/paddlex/deploy/deps/yaml-cpp.zip),无需解压,并修改`PaddleX\deploy\cpp\cmake\yaml.cmake`中将`URL https://bj.bcebos.com/paddlex/deploy/deps/yaml-cpp.zip` 中的网址替换为第3步中下载的路径,如改为`URL D:\projects\yaml-cpp.zip`。
+- 如果无法联网,请手动点击下载 [yaml-cpp.zip](https://bj.bcebos.com/paddlex/deploy/deps/yaml-cpp.zip),无需解压,并修改`PaddleX\dygraph\deploy\cpp\cmake\yaml.cmake`中将`URL https://bj.bcebos.com/paddlex/deploy/deps/yaml-cpp.zip` 中的网址替换为第3步中下载的路径,如改为`URL D:\projects\yaml-cpp.zip`。
 
 5. 保存并生成CMake缓存
 
@@ -103,10 +100,11 @@ PaddlePaddle C++ 预测库针对是否使用GPU、是否支持TensorRT、以及
 
 ### Step5: 编译结果
 
-编译后会在`PaddleX/deploy/cpp/build/demo`目录下生成`model_infer`和`multi_gpu_model_infer`两个可执行二进制文件示例,分别用于在单卡/多卡上加载模型进行预测,示例使用参考如下文档
+编译后会在`PaddleX/dygraph/deploy/cpp/build/demo`目录下生成`model_infer`和`multi_gpu_model_infer`两个可执行二进制文件示例,分别用于在单卡/多卡上加载模型进行预测,示例使用参考如下文档
 
 - [单卡加载模型预测示例](../../demo/model_infer.md)
 - [多卡加载模型预测示例](../../demo/multi_gpu_model_infer.md)
+- [PaddleInference集成TensorRT加载模型预测示例](../../demo/tensorrt_infer.md)
 
 
 

+ 11 - 15
dygraph/deploy/cpp/docs/demo/model_infer.md

@@ -12,7 +12,7 @@
 - [PaddleDetection导出模型](https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.0/deploy/EXPORT_MODEL.md)
 - [PaddleSeg导出模型](https://github.com/PaddlePaddle/PaddleSeg/blob/release/v2.0/docs/model_export.md)
 - [PaddleClas导出模型](https://github.com/PaddlePaddle/PaddleClas/blob/release/2.1/docs/zh_CN/tutorials/getting_started.md#4-%E4%BD%BF%E7%94%A8inference%E6%A8%A1%E5%9E%8B%E8%BF%9B%E8%A1%8C%E6%A8%A1%E5%9E%8B%E6%8E%A8%E7%90%86)
-- [PaddleX导出模型](https://paddlex.readthedocs.io/zh_CN/develop/deploy/server/python.html#python)
+- [PaddleX导出模型](https://paddlex.readthedocs.io/zh_CN/develop/deploy/export_model.html)
 
 
 用户也可直接下载本教程中从PaddleDetection中导出的YOLOv3模型进行测试,[点击下载](https://bj.bcebos.com/paddlex/deploy2/models/yolov3_mbv1.tar.gz)。
@@ -20,7 +20,8 @@
 ## 步骤三、使用编译好的可执行文件预测
 以步骤二中下载的YOLOv3模型为例,执行如下命令即可进行模型加载和预测
 
-```
+```sh
+# 使用GPU 加参数 --use_gpu=1
 build/demo/model_infer --model_filename=yolov3_mbv1/model/model.pdmodel \
                        --params_filename=yolov3_mbv1/model/model.pdiparams \
                        --cfg_file=yolov3_mbv1/model/infer_cfg.yml \
@@ -40,20 +41,15 @@ Box(45	bowl	0.0191819	3.91156	1.276	225.888	214.273)
 ```
 ### 参数说明
 
-| 参数            | 说明                                                         |
-| --------------- | ------------------------------------------------------------ |
-| model_filename  | **[必填]** 模型结构文件路径,如`yolov3_darknet/model.pdmodel` |
-| params_filename | **[必填]** 模型权重文件路径,如`yolov3_darknet/model.pdiparams` |
-| cfg_file        | **[必填]** 模型配置文件路径,如`yolov3_darknet/infer_cfg.yml` |
+| 参数            | 说明                                                                                                         |
+| --------------- | ------------------------------------------------------------------------------------------------------------ |
+| model_filename  | **[必填]** 模型结构文件路径,如`yolov3_darknet/model.pdmodel`                                                |
+| params_filename | **[必填]** 模型权重文件路径,如`yolov3_darknet/model.pdiparams`                                              |
+| cfg_file        | **[必填]** 模型配置文件路径,如`yolov3_darknet/infer_cfg.yml`                                                |
 | model_type      | **[必填]** 模型来源,det/seg/clas/paddlex,分别表示模型来源于PaddleDetection、PaddleSeg、PaddleClas和PaddleX |
-| image           | 待预测的图片文件路径                                         |
-| image_list      | 待预测的图片路径列表文件路径,如步骤三中的`yolov3_darknet/file_list.txt` |
-| use_gpu         | 是否使用GPU,0或者1,默认为0                                 |
-| gpu_id          | 使用GPU预测时的GUI设备ID,默认为0                            |
-| use_mkl         | 是否使用mkldnn(CPU上加速),0或者1,默认为1                 |
-| mkl_thread_num  | 在批预测时,预处理的并行线程数设置,默认为1                  |
-| batch_size      | 当指定image_list时,设定每次预测时的batch大小,默认为1       |
-| thread_num      | 当预测batch大于1时,设定图像预处理时的并行线程数,默认为1    |
+| image           | 待预测的图片文件路径                                                                                         |
+| use_gpu         | 是否使用GPU,0或者1,默认为0                                                                                 |
+| gpu_id          | 使用GPU预测时的GUI设备ID,默认为0                                                                            |
 
 
 

+ 2 - 2
dygraph/deploy/cpp/docs/demo/multi_gpu_model_infer.md

@@ -1,6 +1,6 @@
 # 多GPU卡模型加载预测示例
 
-本文档说明`PaddleX/deploy/cpp/demo/multi_gpu_model_infer.cpp`编译后的使用方法,仅供用户参考进行使用,开发者可基于此demo示例进行二次开发,满足集成的需求。
+本文档说明`PaddleX/dygraph/deploy/cpp/demo/multi_gpu_model_infer.cpp`编译后的使用方法,仅供用户参考进行使用,开发者可基于此demo示例进行二次开发,满足集成的需求。
 
 在多卡上实现机制如下
 
@@ -22,7 +22,7 @@
 - [PaddleDetection导出模型](https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.0/deploy/EXPORT_MODEL.md)
 - [PaddleSeg导出模型](https://github.com/PaddlePaddle/PaddleSeg/blob/release/v2.0/docs/model_export.md)
 - [PaddleClas导出模型](https://github.com/PaddlePaddle/PaddleClas/blob/release/2.1/docs/zh_CN/tutorials/getting_started.md#4-%E4%BD%BF%E7%94%A8inference%E6%A8%A1%E5%9E%8B%E8%BF%9B%E8%A1%8C%E6%A8%A1%E5%9E%8B%E6%8E%A8%E7%90%86)
-- [PaddleX导出模型](https://paddlex.readthedocs.io/zh_CN/develop/deploy/server/python.html#python)
+- [PaddleX导出模型](https://paddlex.readthedocs.io/zh_CN/develop/deploy/export_model.html)
 
 
 

+ 150 - 0
dygraph/deploy/cpp/docs/demo/tensorrt_infer.md

@@ -0,0 +1,150 @@
+# TensorRT加载模型预测
+
+本文档基于`PaddleX/dygraph/deploy/cpp/demo/tensorrt_infer.cpp`示例,讲述如何用PaddleInference引擎结合TensorRT部署模型。开发者可基于此demo示例进行二次开发,满足集成的需求。
+
+## 步骤一、编译
+参考编译文档
+- [Linux系统上编译指南](../compile/paddle/linux.md)
+- [Windows系统上编译指南](../compile/paddle/windows.md)
+
+**注意**:  编译时需要把TensorRT开关打开, 并填写TensorRT 路径。TensorRT下载链接:[NVIDIA 官网](https://developer.nvidia.com/nvidia-tensorrt-download)
+
+## 步骤二、准备PaddlePaddle部署模型
+开发者可从以下套件获取部署模型,需要注意,部署时需要准备的是导出来的部署模型,一般包含`model.pdmodel`、`model.pdiparams`和`deploy.yml`三个文件,分别表示模型结构、模型权重和各套件自行定义的配置信息。
+- [PaddleDetection导出模型](https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.0/deploy/EXPORT_MODEL.md)
+- [PaddleSeg导出模型](https://github.com/PaddlePaddle/PaddleSeg/blob/release/v2.0/docs/model_export.md)
+- [PaddleClas导出模型](https://github.com/PaddlePaddle/PaddleClas/blob/release/2.1/docs/zh_CN/tutorials/getting_started.md#4-%E4%BD%BF%E7%94%A8inference%E6%A8%A1%E5%9E%8B%E8%BF%9B%E8%A1%8C%E6%A8%A1%E5%9E%8B%E6%8E%A8%E7%90%86)
+- [PaddleX导出模型](https://paddlex.readthedocs.io/zh_CN/develop/deploy/export_model.html)
+
+用户也可直接下载本教程中从PaddleDetection中导出的YOLOv3模型进行测试,[点击下载](https://bj.bcebos.com/paddlex/deploy2/models/yolov3_mbv1.tar.gz)。
+
+### 模型动态Shape处理
+
+目前有两种方法使用TensorRT部署动态Shape的模型:
+
+1. 在导出模型时,固定shape的大小。
+
+   例如[PaddleDetection模型部署](../models/paddledetection.md)文档中有如何固定shape的导出方法
+
+2. 配置TensorRT的动态shape参数。 PaddleSeg等套件导出模型时没提供固定shape的方法, 需要配置TensorRT参数才能加载模型。
+
+   TensorRT配置参数说明, 动态输入一定要配置`min_input_shape`、`max_input_shape`、`optim_input_shape` 三个参数
+
+   | 参数               | 说明                                                                                                                       |
+   | ------------------ | -------------------------------------------------------------------------------------------------------------------------- |
+   | precision          | 数据类型, 默认值为0。 0表示FP32 、1表示FP16、2表示INT8                                                                    |
+   | max_workspace_size | TensorRT申请的显存空间, 默认值为 1 << 10(1M),  如果大点模型需要提到1 << 30                                                |
+   | min_input_shape    | 动态输入的最小shape。 map类型, key为输入名称, value为shape                                                               |
+   | max_input_shape    | 动态输入的最大shape。 map类型, key为输入名称, value为shape                                                               |
+   | optim_input_shape  | 动态输入的最常出现的shape。 map类型, key为输入名称, value为shape                                                         |
+   | min_subgraph_size  | 最小的字图大小,用于过滤节点过小的子图。<br/>设置为10,则小于10个node的子图将不用TensorRT优化,用PaddleInference引擎运行。 |
+
+   可参考`demo/tensorrt_infer.cpp`例子:
+
+```c++
+  PaddleDeploy::PaddleEngineConfig engine_config;
+  engine_config.use_gpu = true;
+  engine_config.use_trt = true;
+  engine_config.precision = 0;
+  engine_config.min_subgraph_size = 10;
+  engine_config.max_workspace_size = 1 << 30;
+ // 分类clas和检测detection模型, 一般只需根据实际模型情况修改输入的shape即可
+ // seg分割模型一般需要填写额外的其他节点信息, 注意看提示
+   if ("clas" == FLAGS_model_type) {
+      engine_config.min_input_shape["inputs"] = {1, 3, 224, 224};
+      engine_config.max_input_shape["inputs"] = {1, 3, 224, 224};
+      engine_config.optim_input_shape["inputs"] = {1, 3, 224, 224};
+    } else if ("det" == FLAGS_model_type) {
+      // Adjust shape according to the actual model
+      engine_config.min_input_shape["image"] = {1, 3, 608, 608};
+      engine_config.max_input_shape["image"] = {1, 3, 608, 608};
+      engine_config.optim_input_shape["image"] = {1, 3, 608, 608};
+    } else if ("seg" == FLAGS_model_type) {
+      engine_config.min_input_shape["x"] = {1, 3, 100, 100};
+      engine_config.max_input_shape["x"] = {1, 3, 2000, 2000};
+      engine_config.optim_input_shape["x"] = {1, 3, 1024, 1024};
+      // Additional nodes need to be added, pay attention to the output prompt
+    }
+```
+
+如果运行时出现以下报错,则需要根据报错将提示的输入节点填入配置中
+
+![tensorrt](../images/tensorrt.png)
+
+```c++
+    // 上面报错,配置文件填写示例
+    engine_config.min_input_shape["x"] = {1,3, 100, 100};
+    engine_config.max_input_shape["x"] = {1,3, 2000, 2000};
+    engine_config.optim_input_shape["x"] = {1,3, 129, 129};
+
+    engine_config.min_input_shape["batch_norm_37.tmp_2"] = {1,64, 7, 7};
+    engine_config.max_input_shape["batch_norm_37.tmp_2"] = {1,64, 125, 125};
+    engine_config.optim_input_shape["batch_norm_37.tmp_2"] = {1,64, 12, 12};
+
+    engine_config.min_input_shape["bilinear_interp_v2_1.tmp_0"] = {1,16, 25, 25};
+    engine_config.max_input_shape["bilinear_interp_v2_1.tmp_0"] = {1,16, 500, 500};
+    engine_config.optim_input_shape["bilinear_interp_v2_1.tmp_0"] = {1,16, 48, 48};
+
+    engine_config.min_input_shape["bilinear_interp_v2_2.tmp_0"] = {1,16, 25, 25};
+    engine_config.max_input_shape["bilinear_interp_v2_2.tmp_0"] = {1,16, 500, 500};
+    engine_config.optim_input_shape["bilinear_interp_v2_2.tmp_0"] = {1,16, 48, 48};
+
+    engine_config.min_input_shape["bilinear_interp_v2_3.tmp_0"] = {1,32, 13, 13};
+    engine_config.max_input_shape["bilinear_interp_v2_3.tmp_0"] = {1,32, 250, 250};
+    engine_config.optim_input_shape["bilinear_interp_v2_3.tmp_0"] = {1,32, 24, 24};
+
+    engine_config.min_input_shape["relu_21.tmp_0"] = {1,16, 25, 25};
+    engine_config.max_input_shape["relu_21.tmp_0"] = {1,16, 500, 500};
+    engine_config.optim_input_shape["relu_21.tmp_0"] = {1,16, 48, 48};
+
+    engine_config.min_input_shape["relu_29.tmp_0"] = {1,64, 7, 7};
+    engine_config.max_input_shape["relu_29.tmp_0"] = {1,64, 125, 125};
+    engine_config.optim_input_shape["relu_29.tmp_0"] = {1,64, 12, 12};
+
+    engine_config.min_input_shape["relu_32.tmp_0"] = {1,16, 13, 13};
+    engine_config.max_input_shape["relu_32.tmp_0"] = {1,16, 250, 250};
+    engine_config.optim_input_shape["relu_32.tmp_0"] = {1,16, 24, 24};
+
+    engine_config.min_input_shape["tmp_15"] = {1,32, 13, 13};
+    engine_config.max_input_shape["tmp_15"] = {1,32, 250, 250};
+    engine_config.optim_input_shape["tmp_15"] = {1,32, 24, 24};
+```
+
+## 步骤三、使用编译好的可执行文件预测
+
+以步骤二中下载的YOLOv3模型为例,执行如下命令即可进行模型加载和预测
+
+```sh
+# 使用GPU 加参数 --use_gpu=1
+build/demo/tensorrt_infer --model_filename=yolov3_mbv1/model/model.pdmodel \
+                          --params_filename=yolov3_mbv1/model/model.pdiparams \
+                          --cfg_file=yolov3_mbv1/model/infer_cfg.yml \
+                          --image=yolov3_mbv1/images/000000010583.jpg \
+                          --model_type=det
+```
+输出结果如下(分别为类别id、标签、置信度、xmin、ymin、w, h)
+```
+Box(0	person	0.0386442	2.11425	53.4415	36.2138	197.833)
+Box(39	bottle	0.0134608	2.11425	53.4415	36.2138	197.833)
+Box(41	cup	0.0255145	2.11425	53.4415	36.2138	197.833)
+Box(43	knife	0.0824398	509.703	189.959	100.65	93.9368)
+Box(43	knife	0.0211949	448.076	167.649	162.924	143.557)
+Box(44	spoon	0.0234474	509.703	189.959	100.65	93.9368)
+Box(45	bowl	0.0461333	0	0	223.386	83.5562)
+Box(45	bowl	0.0191819	3.91156	1.276	225.888	214.273)
+```
+### 参数说明
+
+| 参数            | 说明                                                                                                         |
+| --------------- | ------------------------------------------------------------------------------------------------------------ |
+| model_filename  | **[必填]** 模型结构文件路径,如`yolov3_darknet/model.pdmodel`                                                |
+| params_filename | **[必填]** 模型权重文件路径,如`yolov3_darknet/model.pdiparams`                                              |
+| cfg_file        | **[必填]** 模型配置文件路径,如`yolov3_darknet/infer_cfg.yml`                                                |
+| model_type      | **[必填]** 模型来源,det/seg/clas/paddlex,分别表示模型来源于PaddleDetection、PaddleSeg、PaddleClas和PaddleX |
+| image           | 待预测的图片文件路径                                                                                         |
+
+
+
+## 相关文档
+
+- [部署接口和数据结构文档](../apis/model.md)

BIN
dygraph/deploy/cpp/docs/images/tensorrt.png


+ 3 - 2
dygraph/deploy/cpp/docs/models/paddleclas.md

@@ -56,8 +56,8 @@ ResNet50
 
 参考编译文档
 
-- [Linux系统上编译指南](../compile/linux.md)
-- [Windows系统上编译指南](../compile/windows.md)
+- [Linux系统上编译指南](../compile/paddle/linux.md)
+- [Windows系统上编译指南](../compile/paddle/windows.md)
 
 
 
@@ -83,3 +83,4 @@ Classify(809    sunscreen   0.939211)
 
 - [单卡加载模型预测示例](../demo/model_infer.md)
 - [多卡加载模型预测示例](../demo/multi_gpu_model_infer.md)
+- [PaddleInference集成TensorRT加载模型预测示例](../../demo/tensorrt_infer.md)

+ 19 - 17
dygraph/deploy/cpp/docs/models/paddledetection.md

@@ -1,6 +1,6 @@
 # PaddleDetection模型部署
 
-当前支持PaddleDetection release/0.5和release/2.0分支导出的模型进行部署(仅支持FasterRCNN/MaskRCNN/PPYOLO/PPYOLOv2/YOLOv3)。PaddleDetection相关详细文档可以查看[官网文档](https://github.com/PaddlePaddle/PaddleDetection/tree/release/0.5)。
+当前支持PaddleDetection release/0.5和release/2.1分支导出的模型进行部署(仅支持FasterRCNN/MaskRCNN/PPYOLO/PPYOLOv2/YOLOv3)。PaddleDetection相关详细文档可以查看[官网文档](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.1)。
 
 下面主要以YoloV3为例,讲解从模型导出到部署的整个流程。
 
@@ -11,29 +11,29 @@
 ```sh
 git clone https://github.com/PaddlePaddle/PaddleDetection.git
 cd PaddleDetection
-git checkout realease/2.0
 ```
 
 ### 2. 导出基于COCO数据的预训练模型
 
 在进入`PaddleDetection`目录后,执行如下命令导出预训练模型
 
-```sh
-python tools/export_models.py -c configs/yolov3_darknet.yml \
-                              -o weights=https://paddlemodels.bj.bcebos.com/object_detection/yolov3_darknet.tar \
-                              --output_dir=inference_model
+```python
+# 导出YOLOv3模型
+python tools/export_model.py -c configs/yolov3/yolov3_darknet53_270e_coco.yml \
+                             -o weights=https://paddledet.bj.bcebos.com/models/yolov3_darknet53_270e_coco.pdparams \
+                             --output_dir=./inference_model
 ```
 
 **如果你需要使用TensorRT进行部署预测**,则需要在导出模型时固定输入shape,命令如下
 
-```sh
-python tools/export_models.py -c configs/yolov3_darknet.yml \
-                              -o weights=https://paddlemodels.bj.bcebos.com/object_detection/yolov3_darknet.tar \
+```python
+python tools/export_model.py -c configs/yolov3/yolov3_darknet53_270e_coco.yml \
+                              -o weights=https://paddledet.bj.bcebos.com/models/yolov3_darknet53_270e_coco.pdparams \
                               TestReader.inputs_def.image_shape=[3,640,640] \
-                              --output_dir=inference_model
+                              --output_dir=./inference_model
 ```
 
-导出的部署模型会保存在`inference_model/yolov3_darknet`目录,其结构如下
+导出的部署模型会保存在`inference_model/yolov3_darknet53_270e_coco`目录,其结构如下
 
 ```
 yolov3_darknet
@@ -49,8 +49,8 @@ yolov3_darknet
 
 参考编译文档
 
-- [Linux系统上编译指南](../compile/linux.md)
-- [Windows系统上编译指南](../compile/windows.md)
+- [Linux系统上编译指南](../compile/paddle/linux.md)
+- [Windows系统上编译指南](../compile/paddle/windows.md)
 
 
 
@@ -58,10 +58,11 @@ yolov3_darknet
 
 编译后即可获取可执行的二进制demo程序`model_infer`和`multi_gpu_model_infer`,分别用于在单卡/多卡上加载模型进行预测,对于分类模型,调用如下命令即可进行预测
 
-```
-./build/demo/model_infer --model_filename=inference_model/yolov3_darknet/model.pdmodel \
-                         --params_filename=inference_model/yolov3_darknet/model.pdiparams \
-                         --cfg_file=inference_model/yolov3_darknet/infer_cfg.yml \
+```sh
+# 使用gpu加 --use_gpu=1 参数
+./build/demo/model_infer --model_filename=inference_model/yolov3_darknet53_270e_coco/model.pdmodel \
+                         --params_filename=inference_model/yolov3_darknet53_270e_coco/model.pdiparams \
+                         --cfg_file=inference_model/yolov3_darknet53_270e_coco/infer_cfg.yml \
                          --image=test.jpg \
                          --model_type=det
 ```
@@ -79,3 +80,4 @@ Box(39  bottle  0.356306    551.603 288.384 34.9819 112.599)
 
 - [单卡加载模型预测示例](../demo/model_infer.md)
 - [多卡加载模型预测示例](../demo/multi_gpu_model_infer.md)
+- [PaddleInference集成TensorRT加载模型预测示例](../../demo/tensorrt_infer.md)

+ 6 - 8
dygraph/deploy/cpp/docs/models/paddleseg.md

@@ -1,6 +1,6 @@
 # PaddleSeg模型部署
 
-当前支持PaddleSeg release/2.0分支训练的模型进行导出及部署。本文档以[Deeplabv3P](https://github.com/PaddlePaddle/PaddleSeg/blob/release/v2.0/configs/deeplabv3p)模型为例,讲述从release-2.0版本导出模型并进行cpp部署整个流程。 PaddleSeg相关详细文档查看[官网文档](https://github.com/PaddlePaddle/PaddleSeg/blob/release/v2.0/README_CN.md)
+当前支持PaddleSeg release/2.1分支训练的模型进行导出及部署。本文档以[Deeplabv3P](https://github.com/PaddlePaddle/PaddleSeg/blob/release/v2.0/configs/deeplabv3p)模型为例,讲述从release-2.1版本导出模型并进行cpp部署整个流程。 PaddleSeg相关详细文档查看[官网文档](https://github.com/PaddlePaddle/PaddleSeg/blob/release/2.1/README_CN.md)
 
 ## 步骤一 部署模型导出
 
@@ -9,7 +9,6 @@
 ```sh
 git clone https://github.com/PaddlePaddle/PaddleSeg.git
 cd PaddleSeg
-git checkout realease/v2.0
 ```
 
 ### 2. 导出基于Cityscape数据的预训练模型
@@ -35,16 +34,12 @@ output
   └── model.pdmodel          # 静态图模型文件
 ```
 
-
-
 ## 步骤二 编译
 
 参考编译文档
 
-- [Linux系统上编译指南](../compile/linux.md)
-- [Windows系统上编译指南](../compile/windows.md)
-
-
+- [Linux系统上编译指南](../compile/paddle/linux.md)
+- [Windows系统上编译指南](../compile/paddle/windows.md)
 
 ## 步骤三 模型预测
 
@@ -64,7 +59,10 @@ output
 ScoreMask(mean: 12.4814 std:    10.4955)    LabelMask(mean: 1.98847 std:    10.3141)
 ```
 
+**注意:**release/2.1之后,PaddleSeg导出的模型默认只有label_map, score_map的值都被填充为1.0
+
 关于demo程序的详细使用方法可分别参考以下文档
 
 - [单卡加载模型预测示例](../demo/model_infer.md)
 - [多卡加载模型预测示例](../demo/multi_gpu_model_infer.md)
+- [PaddleInference集成TensorRT加载模型预测示例](../../demo/tensorrt_infer.md)

+ 11 - 18
dygraph/deploy/cpp/model_deploy/common/include/base_model.h

@@ -64,21 +64,11 @@ class Model {
     return false;
   }
 
-  bool PaddleEngineInit(const std::string& model_filename,
-                        const std::string& params_filename,
-                        bool use_gpu = false, int gpu_id = 0,
-                        bool use_mkl = true, int mkl_thread_num = 8);
-
-  bool TritonEngineInit(const std::string& url,
-                        const std::string& model_name,
-                        const std::string& model_version,
-                        bool verbose = false);
-
-  bool TensorRTInit(const std::string& model_file,
-                    const std::string& cfg_file,
-                    const int gpu_id = 0,
-                    const bool save_engine = false,
-                    std::string trt_cache_file = "");
+  bool PaddleEngineInit(const PaddleEngineConfig& engine_config);
+
+  bool TritonEngineInit(const TritonEngineConfig& engine_config);
+
+  bool TensorRTInit(const TensorRTEngineConfig& engine_config);
 
   virtual bool PostprocessInit() {
     postprocess_ = nullptr;
@@ -104,12 +94,15 @@ class Model {
     std::vector<DataBlob> inputs;
     std::vector<DataBlob> outputs;
 
-    if (!preprocess_->Run(&imgs_clone, &inputs, &shape_infos, thread_num))
+    if (!preprocess_->Run(&imgs_clone, &inputs, &shape_infos, thread_num)) {
       return false;
-    if (!infer_engine_->Infer(inputs, &outputs))
+    }
+    if (!infer_engine_->Infer(inputs, &outputs)) {
       return false;
-    if (!postprocess_->Run(outputs, shape_infos, results, thread_num))
+    }
+    if (!postprocess_->Run(outputs, shape_infos, results, thread_num)) {
       return false;
+    }
     return true;
   }
 

+ 5 - 6
dygraph/deploy/cpp/model_deploy/common/include/multi_gpu_model.h

@@ -18,6 +18,7 @@
 #include <vector>
 
 #include "model_deploy/common/include/model_factory.h"
+#include "model_deploy/engine/include/engine.h"
 
 namespace PaddleDeploy {
 class MultiGPUModel {
@@ -49,8 +50,7 @@ class MultiGPUModel {
     return true;
   }
 
-  bool PaddleEngineInit(const std::string& model_filename,
-                        const std::string& params_filename,
+  bool PaddleEngineInit(PaddleEngineConfig engine_config,
                         const std::vector<int> gpu_ids) {
     if (gpu_ids.size() != models_.size()) {
       std::cerr << "Paddle Engine Init gpu_ids != MultiGPUModel Init gpu_num"
@@ -58,11 +58,10 @@ class MultiGPUModel {
                 << std::endl;
       return false;
     }
+    engine_config.use_gpu = true;
     for (auto i = 0; i < gpu_ids.size(); ++i) {
-      if (!models_[i]->PaddleEngineInit(model_filename,
-                                        params_filename,
-                                        true, gpu_ids[i],
-                                        true)) {
+      engine_config.gpu_id = gpu_ids[i];
+      if (!models_[i]->PaddleEngineInit(engine_config)) {
         std::cerr << "Paddle Engine Init error:" << gpu_ids[i] << std::endl;
         return false;
       }

+ 1 - 0
dygraph/deploy/cpp/model_deploy/common/include/paddle_deploy.h

@@ -18,6 +18,7 @@
 
 #include "model_deploy/common/include/output_struct.h"
 #include "model_deploy/common/include/model_factory.h"
+#include "model_deploy/engine/include/engine.h"
 
 namespace PaddleDeploy {
 inline std::shared_ptr<Model> CreateModel(const std::string &name) {

+ 21 - 4
dygraph/deploy/cpp/model_deploy/engine/include/engine_config.h

@@ -48,7 +48,7 @@ struct PaddleEngineConfig {
   bool use_trt = false;
 
   //  Set batchsize
-  int batch_size = 1;
+  int max_batch_size = 1;
 
   //  Set TensorRT min_subgraph_size
   int min_subgraph_size = 1;
@@ -65,13 +65,25 @@ struct PaddleEngineConfig {
 
   //  Is offline calibration required, when tensorrt is used
   bool use_calib_mode = false;
+
+  //  tensorrt workspace size
+  int max_workspace_size = 1 << 10;
+
+  //  tensorrt dynamic shape ,  min input shape
+  std::map<std::string, std::vector<int>> min_input_shape;
+
+  //  tensorrt dynamic shape ,  max input shape
+  std::map<std::string, std::vector<int>> max_input_shape;
+
+  //  tensorrt dynamic shape ,  optimal input shape
+  std::map<std::string, std::vector<int>> optim_input_shape;
 };
 
 struct TritonEngineConfig {
   TritonEngineConfig() : model_name_(""), model_version_(""),
         request_id_(""), sequence_id_(0), sequence_start_(false),
         sequence_end_(false), priority_(0), server_timeout_(0),
-        client_timeout_(0) {}
+        client_timeout_(0), verbose_(false), url_("") {}
   /// The name of the model to run inference.
   std::string model_name_;
   /// The version of the model to use while running inference. The default
@@ -118,14 +130,19 @@ struct TritonEngineConfig {
   // InferenceServerGrpcClient::StartStream().
   uint64_t client_timeout_;
 
-  bool verbose_ = false;
+  // open client log
+  bool verbose_;
 
+  // Request the address
   std::string url_;
 };
 
 struct TensorRTEngineConfig {
   // onnx model path
-  std::string model_file_;
+  std::string model_file_ = "";
+
+  // paddle model config file
+  std::string cfg_file_ = "";
 
   // GPU workspace size
   int max_workspace_size_ = 1<<28;

+ 1 - 0
dygraph/deploy/cpp/model_deploy/engine/include/tensorrt_engine.h

@@ -95,6 +95,7 @@ class TensorRTInferenceEngine : public InferEngine {
              std::vector<DataBlob>* output_blobs);
 
   std::shared_ptr<nvinfer1::ICudaEngine> engine_{nullptr};
+  std::shared_ptr<nvinfer1::IExecutionContext> context_;
   NaiveLogger logger_;
 
  private:

+ 10 - 11
dygraph/deploy/cpp/model_deploy/engine/src/ppinference_engine.cpp

@@ -15,17 +15,10 @@
 #include "model_deploy/engine/include/ppinference_engine.h"
 
 namespace PaddleDeploy {
-bool Model::PaddleEngineInit(const std::string& model_filename,
-                             const std::string& params_filename, bool use_gpu,
-                             int gpu_id, bool use_mkl, int mkl_thread_num) {
+bool Model::PaddleEngineInit(const PaddleEngineConfig& engine_config) {
   infer_engine_ = std::make_shared<PaddleInferenceEngine>();
   InferenceConfig config("paddle");
-  config.paddle_config->model_filename = model_filename;
-  config.paddle_config->params_filename = params_filename;
-  config.paddle_config->use_gpu = use_gpu;
-  config.paddle_config->gpu_id = gpu_id;
-  config.paddle_config->use_mkl = use_mkl;
-  config.paddle_config->mkl_thread_num = mkl_thread_num;
+  *(config.paddle_config) = engine_config;
   return infer_engine_->Init(config);
 }
 
@@ -61,12 +54,18 @@ bool PaddleInferenceEngine::Init(const InferenceConfig& infer_config) {
       return false;
     }
     config.EnableTensorRtEngine(
-        1 << 10 /* workspace_size*/,
-        engine_config.batch_size /* max_batch_size*/,
+        engine_config.max_workspace_size /* workspace_size*/,
+        engine_config.max_batch_size /* max_batch_size*/,
         engine_config.min_subgraph_size /* min_subgraph_size*/,
         precision /* precision*/,
         engine_config.use_static /* use_static*/,
         engine_config.use_calib_mode /* use_calib_mode*/);
+
+    if (engine_config.min_input_shape.size() != 0) {
+      config.SetTRTDynamicShapeInfo(engine_config.min_input_shape,
+                                    engine_config.max_input_shape,
+                                    engine_config.optim_input_shape);
+    }
   }
   predictor_ = std::move(paddle_infer::CreatePredictor(config));
   return true;

+ 16 - 19
dygraph/deploy/cpp/model_deploy/engine/src/tensorrt_engine.cpp

@@ -30,26 +30,22 @@ int DtypeConver(const nvinfer1::DataType& dtype) {
   return -1;
 }
 
-bool Model::TensorRTInit(const std::string& model_file,
-                         const std::string& cfg_file,
-                         const int gpu_id,
-                         const bool save_engine,
-                         std::string trt_cache_file) {
+bool Model::TensorRTInit(const TensorRTEngineConfig& engine_config) {
   infer_engine_ = std::make_shared<TensorRTInferenceEngine>();
   InferenceConfig config("tensorrt");
-  config.tensorrt_config->model_file_ = model_file;
-  config.tensorrt_config->gpu_id_ = gpu_id;
-  config.tensorrt_config->save_engine_ = save_engine;
-  config.tensorrt_config->trt_cache_file_ = trt_cache_file;
-  config.tensorrt_config->yaml_config_ = YAML::LoadFile(cfg_file);
-  if (!config.tensorrt_config->yaml_config_["input"].IsDefined()) {
+
+  YAML::Node node  = YAML::LoadFile(engine_config.cfg_file_);
+  if (!node["input"].IsDefined()) {
     std::cout << "Fail to find input in yaml file!" << std::endl;
     return false;
   }
-  if (!config.tensorrt_config->yaml_config_["output"].IsDefined()) {
+  if (!node["output"].IsDefined()) {
     std::cout << "Fail to find output in yaml file!" << std::endl;
     return false;
   }
+
+  *(config.tensorrt_config) = engine_config;
+  config.tensorrt_config->yaml_config_ = node;
   return infer_engine_->Init(config);
 }
 
@@ -130,6 +126,13 @@ bool TensorRTInferenceEngine::Init(const InferenceConfig& engine_config) {
                                                    *config),
                     InferDeleter());
 
+  context_ = std::shared_ptr<nvinfer1::IExecutionContext>(
+                    engine_->createExecutionContext(),
+                    InferDeleter());
+  if (!context_) {
+    return false;
+  }
+
   if (tensorrt_config.save_engine_) {
     if (!SaveEngine(*(engine_.get()), tensorrt_config.trt_cache_file_)) {
       std::cout << "Fail save Trt Engine to "
@@ -221,16 +224,10 @@ bool TensorRTInferenceEngine::SaveEngine(const nvinfer1::ICudaEngine& engine,
 
 bool TensorRTInferenceEngine::Infer(const std::vector<DataBlob>& input_blobs,
                                     std::vector<DataBlob>* output_blobs) {
-  auto context = InferUniquePtr<nvinfer1::IExecutionContext>(
-                     engine_->createExecutionContext());
-  if (!context) {
-    return false;
-  }
-
   TensorRT::BufferManager buffers(engine_);
   FeedInput(input_blobs, buffers);
   buffers.copyInputToDevice();
-  bool status = context->executeV2(buffers.getDeviceBindings().data());
+  bool status = context_->executeV2(buffers.getDeviceBindings().data());
   if (!status) {
     return false;
   }

+ 2 - 8
dygraph/deploy/cpp/model_deploy/engine/src/triton_engine.cpp

@@ -51,16 +51,10 @@ int DtypeToInt(std::string dtype) {
   }
 }
 
-bool Model::TritonEngineInit(const std::string& url,
-                             const std::string& model_name,
-                             const std::string& model_version,
-                             bool verbose) {
+bool Model::TritonEngineInit(const TritonEngineConfig& engine_config) {
   infer_engine_ = std::make_shared<TritonInferenceEngine>();
   InferenceConfig config("triton");
-  config.triton_config->url_ = url;
-  config.triton_config->model_name_ = model_name;
-  config.triton_config->model_version_ = model_version;
-  config.triton_config->verbose_ = verbose;
+  *(config.triton_config) = engine_config;
   infer_engine_->Init(config);
 }
 

+ 5 - 0
dygraph/deploy/cpp/model_deploy/ppseg/include/seg_postprocess.h

@@ -38,10 +38,15 @@ class SegPostprocess : public BasePostprocess {
   virtual bool Run(const std::vector<DataBlob>& outputs,
                    const std::vector<ShapeInfo>& shape_infos,
                    std::vector<Result>* results, int thread_num = 1);
+
   void RestoreSegMap(const ShapeInfo& shape_info,
                      cv::Mat* label_mat,
                      cv::Mat* score_mat,
                      SegResult* result);
+
+  bool RunV2(const DataBlob& outputs,
+             const std::vector<ShapeInfo>& shape_infos,
+             std::vector<Result>* results, int thread_num);
 };
 
 }  // namespace PaddleDeploy

+ 32 - 0
dygraph/deploy/cpp/model_deploy/ppseg/src/seg_postprocess.cpp

@@ -59,6 +59,33 @@ void SegPostprocess::RestoreSegMap(const ShapeInfo& shape_info,
     score_mat->begin<float>(), score_mat->end<float>());
 }
 
+bool SegPostprocess::RunV2(const DataBlob& output,
+                           const std::vector<ShapeInfo>& shape_infos,
+                           std::vector<Result>* results, int thread_num) {
+  int batch_size = shape_infos.size();
+  std::vector<int> score_map_shape = output.shape;
+  int score_map_size = std::accumulate(output.shape.begin() + 1,
+                                       output.shape.end(), 1,
+                                       std::multiplies<int>());
+  const uint8_t* score_map_data =
+          reinterpret_cast<const uint8_t*>(output.data.data());
+  int num_map_pixels = output.shape[1] * output.shape[2];
+
+  for (int i = 0; i < batch_size; ++i) {
+    (*results)[i].model_type = "seg";
+    (*results)[i].seg_result = new SegResult();
+    const uint8_t* current_start_ptr = score_map_data + i * score_map_size;
+    cv::Mat score_mat(output.shape[1], output.shape[2],
+                      CV_32FC1, cv::Scalar(1.0));
+    cv::Mat label_mat(output.shape[1], output.shape[2],
+                      CV_8UC1, const_cast<uint8_t*>(current_start_ptr));
+
+    RestoreSegMap(shape_infos[i], &label_mat,
+                &score_mat, (*results)[i].seg_result);
+  }
+  return true;
+}
+
 bool SegPostprocess::Run(const std::vector<DataBlob>& outputs,
                          const std::vector<ShapeInfo>& shape_infos,
                          std::vector<Result>* results, int thread_num) {
@@ -72,6 +99,11 @@ bool SegPostprocess::Run(const std::vector<DataBlob>& outputs,
     index = 1;
   }
   std::vector<int> score_map_shape = outputs[index].shape;
+  // ppseg version >= 2.1  shape = [b, w, h]
+  if (score_map_shape.size() == 3) {
+    return RunV2(outputs[index], shape_infos, results, thread_num);
+  }
+
   int score_map_size = std::accumulate(score_map_shape.begin() + 1,
                     score_map_shape.end(), 1, std::multiplies<int>());
   const float* score_map_data =

+ 1 - 1
dygraph/deploy/cpp/scripts/tensorrt_build.sh

@@ -100,7 +100,7 @@ rm -rf log
 rm -rf build
 mkdir -p build
 cd build
-cmake ../demo/tensorrt/ \
+cmake ../demo/onnx_tensorrt/ \
     -DTENSORRT_DIR=${TENSORRT_DIR} \
     -DTENSORRT_HEADER=${TENSORRT_HEADER} \
     -DCUDA_DIR=${CUDA_DIR} \

+ 1 - 1
dygraph/deploy/cpp/scripts/triton_build.sh

@@ -80,7 +80,7 @@ fi
 rm -rf build
 mkdir -p build
 cd build
-cmake ../demo/triton/ \
+cmake ../demo/onnx_triton/ \
     -DTRITON_CLIENT=${TRITON_CLIENT} \
     -DOPENCV_DIR=${OPENCV_DIR}  \
     -DGLOG_DIR=${GLOG_DIR} \